diff --git a/base/decoder_first.mlmodelc/analytics/coremldata.bin b/base/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e8f9c0af49dd006b774b87efdd21dab1d27852f --- /dev/null +++ b/base/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3071377562292da4d34bf9d0ddcfe168fd10c3b81d4689d25c207179d2d58578 +size 243 diff --git a/base/decoder_first.mlmodelc/coremldata.bin b/base/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cf1f7e086571f7d550b102f7789cd29797f552e --- /dev/null +++ b/base/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fbe1879a296cf22a0441826a3028ae2ec63bfc8e9ff019132681d2a93610324 +size 453 diff --git a/base/decoder_first.mlmodelc/metadata.json b/base/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..803aa42112f96bd42a9340d775d136f9f692eabb --- /dev/null +++ b/base/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 14, + "Shape" : 12, + "Ios18.linear" : 12, + "Identity" : 1, + "Ios18.gather" : 12, + "Ios18.concat" : 12, + "Ios18.sliceUpdate" : 14, + "Ios18.cast" : 24, + "Ios18.expandDims" : 12, + "Ios18.readState" : 14 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 448 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 448, 512]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 448 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 448, 512]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 1500 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 1500, 512]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 1500 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 1500, 512]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 512", + "shapeRange" : "[[1, 1], [1, 1500], [512, 512]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 512)", + "type" : "MultiArray", + "shape" : "[1, 1, 512]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/base/decoder_first.mlmodelc/model.mil b/base/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..0d10392624267a646ae587a1913bf492cccdee96 --- /dev/null +++ b/base/decoder_first.mlmodelc/model.mil @@ -0,0 +1,369 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 512]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [512, 512]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_79_to_fp16 = const()[name = string("op_79_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2752640)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3276992)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_79_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_83_to_fp16 = const()[name = string("op_83_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3278080)))]; + tensor var_84_to_fp16 = const()[name = string("op_84_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3802432)))]; + tensor linear_1_cast_fp16 = linear(bias = var_84_to_fp16, weight = var_83_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_86_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_86_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_86_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_86_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_86_shape_cast_fp16_to_int16 = cast(dtype = var_86_shape_cast_fp16_to_int16_dtype_0, x = var_86_shape_cast_fp16)[name = string("cast_43")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_86_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_42")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = k_cache2)[name = string("coreml_update_state_16")]; + tensor var_91_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_91_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_91_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_91_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_91_shape_cast_fp16_to_uint16 = cast(dtype = var_91_shape_cast_fp16_to_uint16_dtype_0, x = var_91_shape_cast_fp16)[name = string("cast_41")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_91_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_40")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = v_cache2)[name = string("coreml_update_state_17")]; + tensor var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3803520)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_113_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_117_to_fp16 = const()[name = string("op_117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4327872)))]; + tensor var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4852224)))]; + tensor linear_3_cast_fp16 = linear(bias = var_118_to_fp16, weight = var_117_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_120_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_120_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_120_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_120_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_120_shape_cast_fp16_to_uint16 = cast(dtype = var_120_shape_cast_fp16_to_uint16_dtype_0, x = var_120_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_120_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_38")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_16)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = k_cache2)[name = string("coreml_update_state_18")]; + tensor var_125_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_125_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_125_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_125_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_125_shape_cast_fp16_to_uint16 = cast(dtype = var_125_shape_cast_fp16_to_uint16_dtype_0, x = var_125_shape_cast_fp16)[name = string("cast_37")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_125_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_36")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_17)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = v_cache2)[name = string("coreml_update_state_19")]; + tensor var_147_to_fp16 = const()[name = string("op_147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4853312)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_147_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5377664)))]; + tensor var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5902016)))]; + tensor linear_5_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_154_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_154_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_154_shape_cast_fp16_to_uint16 = cast(dtype = var_154_shape_cast_fp16_to_uint16_dtype_0, x = var_154_shape_cast_fp16)[name = string("cast_35")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_154_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_34")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_18)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = k_cache2)[name = string("coreml_update_state_20")]; + tensor var_159_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_159_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_159_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_159_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_159_shape_cast_fp16_to_uint16 = cast(dtype = var_159_shape_cast_fp16_to_uint16_dtype_0, x = var_159_shape_cast_fp16)[name = string("cast_33")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_159_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_32")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_19)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = v_cache2)[name = string("coreml_update_state_21")]; + tensor var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5903104)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_181_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_185_to_fp16 = const()[name = string("op_185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6427456)))]; + tensor var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6951808)))]; + tensor linear_7_cast_fp16 = linear(bias = var_186_to_fp16, weight = var_185_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_188_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_188_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_188_shape_cast_fp16_to_uint16 = cast(dtype = var_188_shape_cast_fp16_to_uint16_dtype_0, x = var_188_shape_cast_fp16)[name = string("cast_31")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_188_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_30")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_20)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = k_cache2)[name = string("coreml_update_state_22")]; + tensor var_193_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_193_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_193_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_193_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_193_shape_cast_fp16_to_uint16 = cast(dtype = var_193_shape_cast_fp16_to_uint16_dtype_0, x = var_193_shape_cast_fp16)[name = string("cast_29")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_193_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_28")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_21)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = v_cache2)[name = string("coreml_update_state_23")]; + tensor var_215_to_fp16 = const()[name = string("op_215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6952896)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_215_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")]; + tensor var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7477248)))]; + tensor var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8001600)))]; + tensor linear_9_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")]; + tensor var_222_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_222_shape_cast_fp16")]; + int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)]; + int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)]; + bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)]; + string var_222_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_222_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)]; + tensor var_222_shape_cast_fp16_to_uint16 = cast(dtype = var_222_shape_cast_fp16_to_uint16_dtype_0, x = var_222_shape_cast_fp16)[name = string("cast_27")]; + uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_222_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")]; + string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor([0])]; + int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_26")]; + tensor expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([4, 0, 0, 0])]; + tensor concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor([0])]; + tensor concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor([0])]; + tensor concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor([0])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")]; + tensor k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_22)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = k_cache2)[name = string("coreml_update_state_24")]; + tensor var_227_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_227_shape_cast_fp16")]; + int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)]; + int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)]; + bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)]; + string var_227_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_227_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)]; + tensor var_227_shape_cast_fp16_to_uint16 = cast(dtype = var_227_shape_cast_fp16_to_uint16_dtype_0, x = var_227_shape_cast_fp16)[name = string("cast_25")]; + uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_227_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")]; + string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor([0])]; + int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_24")]; + tensor expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; + tensor concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor([0])]; + tensor concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor([0])]; + tensor concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor([0])]; + int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; + bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; + tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")]; + tensor v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_23)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = v_cache2)[name = string("coreml_update_state_25")]; + tensor var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8002688)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_249_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")]; + tensor var_253_to_fp16 = const()[name = string("op_253_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8527040)))]; + tensor var_254_to_fp16 = const()[name = string("op_254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9051392)))]; + tensor linear_11_cast_fp16 = linear(bias = var_254_to_fp16, weight = var_253_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")]; + tensor var_256_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_256_shape_cast_fp16")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_256_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_256_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)]; + tensor var_256_shape_cast_fp16_to_uint16 = cast(dtype = var_256_shape_cast_fp16_to_uint16_dtype_0, x = var_256_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_256_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_22")]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([5, 0, 0, 0])]; + tensor concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor([0])]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")]; + tensor k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_24)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_26_write_state")]; + tensor var_261_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_261_shape_cast_fp16")]; + int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)]; + int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)]; + bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)]; + string var_261_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_261_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)]; + tensor var_261_shape_cast_fp16_to_uint16 = cast(dtype = var_261_shape_cast_fp16_to_uint16_dtype_0, x = var_261_shape_cast_fp16)[name = string("cast_21")]; + uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_261_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")]; + string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor([0])]; + int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_20")]; + tensor expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([5, 0, 0, 0])]; + tensor concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor([0])]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")]; + tensor v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_25)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_27_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/base/decoder_first.mlmodelc/weights/weight.bin b/base/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a89c367089fa47f07e94e2937e6788cfa061906 --- /dev/null +++ b/base/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fdbcff86cdfe9e0b8842ad4bc1af8ebbf22082b1d0342a8304023f63dd3663f +size 9052480 diff --git a/base/decoder_second.mlmodelc/analytics/coremldata.bin b/base/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..85a8ab6e56764bfadaffbcb284f8d74e9b9094c2 --- /dev/null +++ b/base/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a3b3426587d83e56d3286cc0b733c9b8a5bff6b1ad6f9e1789a3cb55164455 +size 243 diff --git a/base/decoder_second.mlmodelc/coremldata.bin b/base/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..da712e28a355163fdbe3915834dcbfd38aad3b6d --- /dev/null +++ b/base/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c0272d581c200e0ab4f29c687e4a7b49152e241cea335fa6faa6a430a460b6 +size 487 diff --git a/base/decoder_second.mlmodelc/metadata.json b/base/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..daf955d1563d226d2c0b3d6145074d9fb2f6df38 --- /dev/null +++ b/base/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 49, + "Ios18.readState" : 14, + "Ios18.expandDims" : 7, + "Ios18.sub" : 1, + "Ios18.matmul" : 24, + "Ios18.gelu" : 6, + "Ios18.gather" : 9, + "Ios18.concat" : 32, + "Shape" : 8, + "Ios18.add" : 31, + "Ios18.sliceUpdate" : 24, + "Ios18.sliceByIndex" : 49, + "Ios18.layerNorm" : 19, + "Ios18.cast" : 16, + "Ios18.transpose" : 48, + "Ios18.writeState" : 12, + "Ios18.reshape" : 48, + "Ios18.softmax" : 12, + "Ios18.mul" : 24 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 448 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 448, 512]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 448 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 448, 512]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 1500 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 1500, 512]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 6 × 1 × 1500 × 512)", + "shortDescription" : "", + "shape" : "[6, 1, 1500, 512]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/base/decoder_second.mlmodelc/model.mil b/base/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..7ba4b7f3fac4bf29423197cd240f630d692f897c --- /dev/null +++ b/base/decoder_second.mlmodelc/model.mil @@ -0,0 +1,1228 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_26_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_26_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_26_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_26_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_26_shape_cast_fp16_to_int16 = cast(dtype = var_26_shape_cast_fp16_to_int16_dtype_0, x = var_26_shape_cast_fp16)[name = string("cast_82")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_26_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_30_shape = shape(x = token_data)[name = string("op_30_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_30_shape_to_uint16_dtype_0 = const()[name = string("op_30_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_30_shape_to_uint16 = cast(dtype = var_30_shape_to_uint16_dtype_0, x = var_30_shape)[name = string("cast_80")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_30_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_79")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_81")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_50_axis_0 = const()[name = string("op_50_axis_0"), val = int32(0)]; + int32 var_50_batch_dims_0 = const()[name = string("op_50_batch_dims_0"), val = int32(0)]; + bool var_50_validate_indices_0 = const()[name = string("op_50_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = token_data, validate_indices = var_50_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_50_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(512)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_53_end_mask_0 = const()[name = string("op_53_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53109888)))]; + tensor var_53_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_53_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_53_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_50_cast_fp16, y = var_53_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 512])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 512])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_76 = const()[name = string("op_76"), val = int32(-1)]; + tensor var_94_axes_0 = const()[name = string("op_94_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53568704)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53569792)))]; + fp16 var_82_to_fp16 = const()[name = string("op_82_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_94_cast_fp16 = layer_norm(axes = var_94_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_94_cast_fp16")]; + tensor var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53570880)))]; + tensor var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54095232)))]; + tensor linear_0_cast_fp16 = linear(bias = var_106_to_fp16, weight = var_105_to_fp16, x = var_94_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54096320)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54620672)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_109_to_fp16, x = var_94_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54621760)))]; + tensor var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55146112)))]; + tensor linear_2_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = var_94_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_116_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_116_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_78")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_77")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(512)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_132_begin_0 = const()[name = string("op_132_begin_0"), val = tensor([0, 0, 0])]; + tensor var_132_end_mask_0 = const()[name = string("op_132_end_mask_0"), val = tensor([true, false, true])]; + tensor var_132_cast_fp16 = slice_by_index(begin = var_132_begin_0, end = concat_10, end_mask = var_132_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_132_cast_fp16")]; + tensor var_135_begin_0 = const()[name = string("op_135_begin_0"), val = tensor([0, 0, 0])]; + tensor var_135_end_mask_0 = const()[name = string("op_135_end_mask_0"), val = tensor([true, false, true])]; + tensor var_135_cast_fp16 = slice_by_index(begin = var_135_begin_0, end = concat_10, end_mask = var_135_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_135_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 8, 64])]; + tensor var_145_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_145_cast_fp16")]; + tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_145_cast_fp16, y = const_30_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 8, 64])]; + tensor var_152_cast_fp16 = reshape(shape = concat_13x, x = var_132_cast_fp16)[name = string("op_152_cast_fp16")]; + tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_152_cast_fp16, y = const_31_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 8, 64])]; + tensor var_159_cast_fp16 = reshape(shape = concat_14x, x = var_135_cast_fp16)[name = string("op_159_cast_fp16")]; + tensor var_160 = const()[name = string("op_160"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = k_5_cast_fp16)[name = string("transpose_118")]; + tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = q_3_cast_fp16)[name = string("transpose_119")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_49, y = transpose_50)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_163_begin_0 = const()[name = string("op_163_begin_0"), val = tensor([0, 0])]; + tensor var_163_end_mask_0 = const()[name = string("op_163_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55147200)))]; + tensor var_163_cast_fp16 = slice_by_index(begin = var_163_begin_0, end = concat_15, end_mask = var_163_end_mask_0, x = mask_to_fp16)[name = string("op_163_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_164_begin_0 = const()[name = string("op_164_begin_0"), val = tensor([0, 0])]; + tensor var_164_end_mask_0 = const()[name = string("op_164_end_mask_0"), val = tensor([true, false])]; + tensor var_164_cast_fp16 = slice_by_index(begin = var_164_begin_0, end = concat_16, end_mask = var_164_end_mask_0, x = var_163_cast_fp16)[name = string("op_164_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_164_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_167_cast_fp16 = softmax(axis = var_76, x = qk_3_cast_fp16)[name = string("op_167_cast_fp16")]; + bool var_169_transpose_x_0 = const()[name = string("op_169_transpose_x_0"), val = bool(false)]; + bool var_169_transpose_y_0 = const()[name = string("op_169_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_160, x = var_159_cast_fp16)[name = string("transpose_120")]; + tensor var_169_cast_fp16 = matmul(transpose_x = var_169_transpose_x_0, transpose_y = var_169_transpose_y_0, x = var_167_cast_fp16, y = v_5_cast_fp16)[name = string("op_169_cast_fp16")]; + tensor var_170 = const()[name = string("op_170"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 512])]; + tensor var_171_cast_fp16 = transpose(perm = var_170, x = var_169_cast_fp16)[name = string("transpose_117")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_171_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_175_to_fp16 = const()[name = string("op_175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55548672)))]; + tensor var_176_to_fp16 = const()[name = string("op_176_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56073024)))]; + tensor linear_3_cast_fp16 = linear(bias = var_176_to_fp16, weight = var_175_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_183_axes_0 = const()[name = string("op_183_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56074112)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56075200)))]; + tensor var_183_cast_fp16 = layer_norm(axes = var_183_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_183_cast_fp16")]; + tensor var_192_to_fp16 = const()[name = string("op_192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56076288)))]; + tensor var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56600640)))]; + tensor linear_4_cast_fp16 = linear(bias = var_193_to_fp16, weight = var_192_to_fp16, x = var_183_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56601728)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 8, 64])]; + tensor var_213_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_213_cast_fp16")]; + tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_213_cast_fp16, y = const_32_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_219 = const()[name = string("op_219"), val = tensor([1, 1500, 8, -1])]; + tensor var_220_cast_fp16 = reshape(shape = var_219, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_220_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_220_cast_fp16, y = const_33_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_226 = const()[name = string("op_226"), val = tensor([1, 1500, 8, -1])]; + tensor var_227_cast_fp16 = reshape(shape = var_226, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_227_cast_fp16")]; + tensor var_228 = const()[name = string("op_228"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = k_9_cast_fp16)[name = string("transpose_114")]; + tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = q_7_cast_fp16)[name = string("transpose_115")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_51, y = transpose_52)[name = string("qk_5_cast_fp16")]; + tensor var_232_cast_fp16 = softmax(axis = var_76, x = qk_5_cast_fp16)[name = string("op_232_cast_fp16")]; + bool var_234_transpose_x_0 = const()[name = string("op_234_transpose_x_0"), val = bool(false)]; + bool var_234_transpose_y_0 = const()[name = string("op_234_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_228, x = var_227_cast_fp16)[name = string("transpose_116")]; + tensor var_234_cast_fp16 = matmul(transpose_x = var_234_transpose_x_0, transpose_y = var_234_transpose_y_0, x = var_232_cast_fp16, y = v_9_cast_fp16)[name = string("op_234_cast_fp16")]; + tensor var_235 = const()[name = string("op_235"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 512])]; + tensor var_236_cast_fp16 = transpose(perm = var_235, x = var_234_cast_fp16)[name = string("transpose_113")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_236_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_240_to_fp16 = const()[name = string("op_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58137792)))]; + tensor var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58662144)))]; + tensor linear_5_cast_fp16 = linear(bias = var_241_to_fp16, weight = var_240_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_248_axes_0 = const()[name = string("op_248_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58663232)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58664320)))]; + tensor var_248_cast_fp16 = layer_norm(axes = var_248_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_82_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_248_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58665408)))]; + tensor var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60762624)))]; + tensor linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_248_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60766784)))]; + tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62864000)))]; + tensor linear_7_cast_fp16 = linear(bias = var_264_to_fp16, weight = var_263_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 512])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 512])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_287 = const()[name = string("op_287"), val = int32(-1)]; + tensor var_305_axes_0 = const()[name = string("op_305_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62865088)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62866176)))]; + fp16 var_293_to_fp16 = const()[name = string("op_293_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_305_cast_fp16 = layer_norm(axes = var_305_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_305_cast_fp16")]; + tensor var_316_to_fp16 = const()[name = string("op_316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62867264)))]; + tensor var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63391616)))]; + tensor linear_8_cast_fp16 = linear(bias = var_317_to_fp16, weight = var_316_to_fp16, x = var_305_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63392704)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_320_to_fp16, x = var_305_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63917056)))]; + tensor var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64441408)))]; + tensor linear_10_cast_fp16 = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_305_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_327_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_327_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_327_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_327_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_327_shape_cast_fp16_to_uint16 = cast(dtype = var_327_shape_cast_fp16_to_uint16_dtype_0, x = var_327_shape_cast_fp16)[name = string("cast_76")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_327_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_75")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = k_cache1)[name = string("coreml_update_state_14")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = v_cache1)[name = string("coreml_update_state_15")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(512)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor([0, 0, 0])]; + tensor var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor([true, false, true])]; + tensor var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = concat_32, end_mask = var_343_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_346_begin_0 = const()[name = string("op_346_begin_0"), val = tensor([0, 0, 0])]; + tensor var_346_end_mask_0 = const()[name = string("op_346_end_mask_0"), val = tensor([true, false, true])]; + tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = concat_32, end_mask = var_346_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_346_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 8, 64])]; + tensor var_356_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_356_cast_fp16")]; + tensor const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_356_cast_fp16, y = const_34_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 8, 64])]; + tensor var_363_cast_fp16 = reshape(shape = concat_35x, x = var_343_cast_fp16)[name = string("op_363_cast_fp16")]; + tensor const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_363_cast_fp16, y = const_35_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 8, 64])]; + tensor var_370_cast_fp16 = reshape(shape = concat_36x, x = var_346_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor var_371 = const()[name = string("op_371"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = k_15_cast_fp16)[name = string("transpose_110")]; + tensor transpose_53 = transpose(perm = transpose_53_perm_0, x = q_11_cast_fp16)[name = string("transpose_111")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_53, y = transpose_54)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_374_begin_0 = const()[name = string("op_374_begin_0"), val = tensor([0, 0])]; + tensor var_374_end_mask_0 = const()[name = string("op_374_end_mask_0"), val = tensor([false, true])]; + tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = concat_37, end_mask = var_374_end_mask_0, x = mask_to_fp16)[name = string("op_374_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_375_begin_0 = const()[name = string("op_375_begin_0"), val = tensor([0, 0])]; + tensor var_375_end_mask_0 = const()[name = string("op_375_end_mask_0"), val = tensor([true, false])]; + tensor var_375_cast_fp16 = slice_by_index(begin = var_375_begin_0, end = concat_38, end_mask = var_375_end_mask_0, x = var_374_cast_fp16)[name = string("op_375_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_375_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_378_cast_fp16 = softmax(axis = var_287, x = qk_9_cast_fp16)[name = string("op_378_cast_fp16")]; + bool var_380_transpose_x_0 = const()[name = string("op_380_transpose_x_0"), val = bool(false)]; + bool var_380_transpose_y_0 = const()[name = string("op_380_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_112")]; + tensor var_380_cast_fp16 = matmul(transpose_x = var_380_transpose_x_0, transpose_y = var_380_transpose_y_0, x = var_378_cast_fp16, y = v_15_cast_fp16)[name = string("op_380_cast_fp16")]; + tensor var_381 = const()[name = string("op_381"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 512])]; + tensor var_382_cast_fp16 = transpose(perm = var_381, x = var_380_cast_fp16)[name = string("transpose_109")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_382_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64442496)))]; + tensor var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64966848)))]; + tensor linear_11_cast_fp16 = linear(bias = var_387_to_fp16, weight = var_386_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64967936)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64969024)))]; + tensor var_394_cast_fp16 = layer_norm(axes = var_394_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_394_cast_fp16")]; + tensor var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64970112)))]; + tensor var_404_to_fp16 = const()[name = string("op_404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65494464)))]; + tensor linear_12_cast_fp16 = linear(bias = var_404_to_fp16, weight = var_403_to_fp16, x = var_394_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 8, 64])]; + tensor var_424_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_424_cast_fp16")]; + tensor const_36_to_fp16 = const()[name = string("const_36_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_424_cast_fp16, y = const_36_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_430 = const()[name = string("op_430"), val = tensor([1, 1500, 8, -1])]; + tensor var_431_cast_fp16 = reshape(shape = var_430, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_431_cast_fp16")]; + tensor const_37_to_fp16 = const()[name = string("const_37_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_431_cast_fp16, y = const_37_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_437 = const()[name = string("op_437"), val = tensor([1, 1500, 8, -1])]; + tensor var_438_cast_fp16 = reshape(shape = var_437, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_438_cast_fp16")]; + tensor var_439 = const()[name = string("op_439"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_56 = transpose(perm = transpose_56_perm_0, x = k_19_cast_fp16)[name = string("transpose_106")]; + tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = q_15_cast_fp16)[name = string("transpose_107")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_55, y = transpose_56)[name = string("qk_11_cast_fp16")]; + tensor var_443_cast_fp16 = softmax(axis = var_287, x = qk_11_cast_fp16)[name = string("op_443_cast_fp16")]; + bool var_445_transpose_x_0 = const()[name = string("op_445_transpose_x_0"), val = bool(false)]; + bool var_445_transpose_y_0 = const()[name = string("op_445_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_439, x = var_438_cast_fp16)[name = string("transpose_108")]; + tensor var_445_cast_fp16 = matmul(transpose_x = var_445_transpose_x_0, transpose_y = var_445_transpose_y_0, x = var_443_cast_fp16, y = v_19_cast_fp16)[name = string("op_445_cast_fp16")]; + tensor var_446 = const()[name = string("op_446"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 512])]; + tensor var_447_cast_fp16 = transpose(perm = var_446, x = var_445_cast_fp16)[name = string("transpose_105")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_447_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65495552)))]; + tensor var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66019904)))]; + tensor linear_13_cast_fp16 = linear(bias = var_452_to_fp16, weight = var_451_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_459_axes_0 = const()[name = string("op_459_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66020992)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66022080)))]; + tensor var_459_cast_fp16 = layer_norm(axes = var_459_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_293_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_459_cast_fp16")]; + tensor var_468_to_fp16 = const()[name = string("op_468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66023168)))]; + tensor var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68120384)))]; + tensor linear_14_cast_fp16 = linear(bias = var_469_to_fp16, weight = var_468_to_fp16, x = var_459_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68124544)))]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70221760)))]; + tensor linear_15_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 512])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_14)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 512])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_15)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 512])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 512])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_498 = const()[name = string("op_498"), val = int32(-1)]; + tensor var_516_axes_0 = const()[name = string("op_516_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70222848)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70223936)))]; + fp16 var_504_to_fp16 = const()[name = string("op_504_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_516_cast_fp16 = layer_norm(axes = var_516_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_516_cast_fp16")]; + tensor var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70225024)))]; + tensor var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70749376)))]; + tensor linear_16_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = var_516_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70750464)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_531_to_fp16, x = var_516_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71274816)))]; + tensor var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71799168)))]; + tensor linear_18_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_516_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_538_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_538_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_538_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_538_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_538_shape_cast_fp16_to_uint16 = cast(dtype = var_538_shape_cast_fp16_to_uint16_dtype_0, x = var_538_shape_cast_fp16)[name = string("cast_74")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_538_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_73")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_14)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = k_cache1)[name = string("coreml_update_state_16")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_15)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = v_cache1)[name = string("coreml_update_state_17")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(512)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_554_begin_0 = const()[name = string("op_554_begin_0"), val = tensor([0, 0, 0])]; + tensor var_554_end_mask_0 = const()[name = string("op_554_end_mask_0"), val = tensor([true, false, true])]; + tensor var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = concat_54, end_mask = var_554_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_554_cast_fp16")]; + tensor var_557_begin_0 = const()[name = string("op_557_begin_0"), val = tensor([0, 0, 0])]; + tensor var_557_end_mask_0 = const()[name = string("op_557_end_mask_0"), val = tensor([true, false, true])]; + tensor var_557_cast_fp16 = slice_by_index(begin = var_557_begin_0, end = concat_54, end_mask = var_557_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_557_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 8, 64])]; + tensor var_567_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_567_cast_fp16")]; + tensor const_38_to_fp16 = const()[name = string("const_38_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_567_cast_fp16, y = const_38_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 8, 64])]; + tensor var_574_cast_fp16 = reshape(shape = concat_57x, x = var_554_cast_fp16)[name = string("op_574_cast_fp16")]; + tensor const_39_to_fp16 = const()[name = string("const_39_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_574_cast_fp16, y = const_39_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 8, 64])]; + tensor var_581_cast_fp16 = reshape(shape = concat_58x, x = var_557_cast_fp16)[name = string("op_581_cast_fp16")]; + tensor var_582 = const()[name = string("op_582"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = k_25_cast_fp16)[name = string("transpose_102")]; + tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = q_19_cast_fp16)[name = string("transpose_103")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_57, y = transpose_58)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_585_begin_0 = const()[name = string("op_585_begin_0"), val = tensor([0, 0])]; + tensor var_585_end_mask_0 = const()[name = string("op_585_end_mask_0"), val = tensor([false, true])]; + tensor var_585_cast_fp16 = slice_by_index(begin = var_585_begin_0, end = concat_59, end_mask = var_585_end_mask_0, x = mask_to_fp16)[name = string("op_585_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor([0, 0])]; + tensor var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor([true, false])]; + tensor var_586_cast_fp16 = slice_by_index(begin = var_586_begin_0, end = concat_60, end_mask = var_586_end_mask_0, x = var_585_cast_fp16)[name = string("op_586_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_586_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_589_cast_fp16 = softmax(axis = var_498, x = qk_15_cast_fp16)[name = string("op_589_cast_fp16")]; + bool var_591_transpose_x_0 = const()[name = string("op_591_transpose_x_0"), val = bool(false)]; + bool var_591_transpose_y_0 = const()[name = string("op_591_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_582, x = var_581_cast_fp16)[name = string("transpose_104")]; + tensor var_591_cast_fp16 = matmul(transpose_x = var_591_transpose_x_0, transpose_y = var_591_transpose_y_0, x = var_589_cast_fp16, y = v_25_cast_fp16)[name = string("op_591_cast_fp16")]; + tensor var_592 = const()[name = string("op_592"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 512])]; + tensor var_593_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_101")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_593_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71800256)))]; + tensor var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72324608)))]; + tensor linear_19_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_605_axes_0 = const()[name = string("op_605_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72325696)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72326784)))]; + tensor var_605_cast_fp16 = layer_norm(axes = var_605_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_605_cast_fp16")]; + tensor var_614_to_fp16 = const()[name = string("op_614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72327872)))]; + tensor var_615_to_fp16 = const()[name = string("op_615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72852224)))]; + tensor linear_20_cast_fp16 = linear(bias = var_615_to_fp16, weight = var_614_to_fp16, x = var_605_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 8, 64])]; + tensor var_635_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_635_cast_fp16")]; + tensor const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_635_cast_fp16, y = const_40_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_641 = const()[name = string("op_641"), val = tensor([1, 1500, 8, -1])]; + tensor var_642_cast_fp16 = reshape(shape = var_641, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_642_cast_fp16")]; + tensor const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_642_cast_fp16, y = const_41_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_648 = const()[name = string("op_648"), val = tensor([1, 1500, 8, -1])]; + tensor var_649_cast_fp16 = reshape(shape = var_648, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_649_cast_fp16")]; + tensor var_650 = const()[name = string("op_650"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = k_29_cast_fp16)[name = string("transpose_98")]; + tensor transpose_59 = transpose(perm = transpose_59_perm_0, x = q_23_cast_fp16)[name = string("transpose_99")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_59, y = transpose_60)[name = string("qk_17_cast_fp16")]; + tensor var_654_cast_fp16 = softmax(axis = var_498, x = qk_17_cast_fp16)[name = string("op_654_cast_fp16")]; + bool var_656_transpose_x_0 = const()[name = string("op_656_transpose_x_0"), val = bool(false)]; + bool var_656_transpose_y_0 = const()[name = string("op_656_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_650, x = var_649_cast_fp16)[name = string("transpose_100")]; + tensor var_656_cast_fp16 = matmul(transpose_x = var_656_transpose_x_0, transpose_y = var_656_transpose_y_0, x = var_654_cast_fp16, y = v_29_cast_fp16)[name = string("op_656_cast_fp16")]; + tensor var_657 = const()[name = string("op_657"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 512])]; + tensor var_658_cast_fp16 = transpose(perm = var_657, x = var_656_cast_fp16)[name = string("transpose_97")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_658_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_662_to_fp16 = const()[name = string("op_662_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72853312)))]; + tensor var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73377664)))]; + tensor linear_21_cast_fp16 = linear(bias = var_663_to_fp16, weight = var_662_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73378752)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73379840)))]; + tensor var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_504_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_670_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73380928)))]; + tensor var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75478144)))]; + tensor linear_22_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = var_670_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75482304)))]; + tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77579520)))]; + tensor linear_23_cast_fp16 = linear(bias = var_686_to_fp16, weight = var_685_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 512])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_16)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 512])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_17)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 512])]; + tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; + tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 512])]; + tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; + int32 var_709 = const()[name = string("op_709"), val = int32(-1)]; + tensor var_727_axes_0 = const()[name = string("op_727_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77580608)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77581696)))]; + fp16 var_715_to_fp16 = const()[name = string("op_715_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_727_cast_fp16 = layer_norm(axes = var_727_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_727_cast_fp16")]; + tensor var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77582784)))]; + tensor var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78107136)))]; + tensor linear_24_cast_fp16 = linear(bias = var_739_to_fp16, weight = var_738_to_fp16, x = var_727_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78108224)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_742_to_fp16, x = var_727_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_746_to_fp16 = const()[name = string("op_746_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78632576)))]; + tensor var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79156928)))]; + tensor linear_26_cast_fp16 = linear(bias = var_747_to_fp16, weight = var_746_to_fp16, x = var_727_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_749_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_749_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_749_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_749_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_749_shape_cast_fp16_to_uint16 = cast(dtype = var_749_shape_cast_fp16_to_uint16_dtype_0, x = var_749_shape_cast_fp16)[name = string("cast_72")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_749_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_71")]; + int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_16)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = k_cache1)[name = string("coreml_update_state_18")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_17)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = v_cache1)[name = string("coreml_update_state_19")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(512)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; + tensor var_765_begin_0 = const()[name = string("op_765_begin_0"), val = tensor([0, 0, 0])]; + tensor var_765_end_mask_0 = const()[name = string("op_765_end_mask_0"), val = tensor([true, false, true])]; + tensor var_765_cast_fp16 = slice_by_index(begin = var_765_begin_0, end = concat_76, end_mask = var_765_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_765_cast_fp16")]; + tensor var_768_begin_0 = const()[name = string("op_768_begin_0"), val = tensor([0, 0, 0])]; + tensor var_768_end_mask_0 = const()[name = string("op_768_end_mask_0"), val = tensor([true, false, true])]; + tensor var_768_cast_fp16 = slice_by_index(begin = var_768_begin_0, end = concat_76, end_mask = var_768_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_768_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 8, 64])]; + tensor var_778_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_778_cast_fp16")]; + tensor const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_778_cast_fp16, y = const_42_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 8, 64])]; + tensor var_785_cast_fp16 = reshape(shape = concat_79x, x = var_765_cast_fp16)[name = string("op_785_cast_fp16")]; + tensor const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_785_cast_fp16, y = const_43_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 8, 64])]; + tensor var_792_cast_fp16 = reshape(shape = concat_80x, x = var_768_cast_fp16)[name = string("op_792_cast_fp16")]; + tensor var_793 = const()[name = string("op_793"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_62 = transpose(perm = transpose_62_perm_0, x = k_35_cast_fp16)[name = string("transpose_94")]; + tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = q_27_cast_fp16)[name = string("transpose_95")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_61, y = transpose_62)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_796_begin_0 = const()[name = string("op_796_begin_0"), val = tensor([0, 0])]; + tensor var_796_end_mask_0 = const()[name = string("op_796_end_mask_0"), val = tensor([false, true])]; + tensor var_796_cast_fp16 = slice_by_index(begin = var_796_begin_0, end = concat_81, end_mask = var_796_end_mask_0, x = mask_to_fp16)[name = string("op_796_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_797_begin_0 = const()[name = string("op_797_begin_0"), val = tensor([0, 0])]; + tensor var_797_end_mask_0 = const()[name = string("op_797_end_mask_0"), val = tensor([true, false])]; + tensor var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = concat_82, end_mask = var_797_end_mask_0, x = var_796_cast_fp16)[name = string("op_797_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_797_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_800_cast_fp16 = softmax(axis = var_709, x = qk_21_cast_fp16)[name = string("op_800_cast_fp16")]; + bool var_802_transpose_x_0 = const()[name = string("op_802_transpose_x_0"), val = bool(false)]; + bool var_802_transpose_y_0 = const()[name = string("op_802_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_793, x = var_792_cast_fp16)[name = string("transpose_96")]; + tensor var_802_cast_fp16 = matmul(transpose_x = var_802_transpose_x_0, transpose_y = var_802_transpose_y_0, x = var_800_cast_fp16, y = v_35_cast_fp16)[name = string("op_802_cast_fp16")]; + tensor var_803 = const()[name = string("op_803"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 512])]; + tensor var_804_cast_fp16 = transpose(perm = var_803, x = var_802_cast_fp16)[name = string("transpose_93")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_804_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_808_to_fp16 = const()[name = string("op_808_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79158016)))]; + tensor var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79682368)))]; + tensor linear_27_cast_fp16 = linear(bias = var_809_to_fp16, weight = var_808_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_816_axes_0 = const()[name = string("op_816_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79683456)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79684544)))]; + tensor var_816_cast_fp16 = layer_norm(axes = var_816_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_816_cast_fp16")]; + tensor var_825_to_fp16 = const()[name = string("op_825_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79685632)))]; + tensor var_826_to_fp16 = const()[name = string("op_826_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80209984)))]; + tensor linear_28_cast_fp16 = linear(bias = var_826_to_fp16, weight = var_825_to_fp16, x = var_816_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 8, 64])]; + tensor var_846_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_846_cast_fp16, y = const_44_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_852 = const()[name = string("op_852"), val = tensor([1, 1500, 8, -1])]; + tensor var_853_cast_fp16 = reshape(shape = var_852, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_853_cast_fp16")]; + tensor const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_853_cast_fp16, y = const_45_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_859 = const()[name = string("op_859"), val = tensor([1, 1500, 8, -1])]; + tensor var_860_cast_fp16 = reshape(shape = var_859, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_860_cast_fp16")]; + tensor var_861 = const()[name = string("op_861"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = k_39_cast_fp16)[name = string("transpose_90")]; + tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = q_31_cast_fp16)[name = string("transpose_91")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_63, y = transpose_64)[name = string("qk_23_cast_fp16")]; + tensor var_865_cast_fp16 = softmax(axis = var_709, x = qk_23_cast_fp16)[name = string("op_865_cast_fp16")]; + bool var_867_transpose_x_0 = const()[name = string("op_867_transpose_x_0"), val = bool(false)]; + bool var_867_transpose_y_0 = const()[name = string("op_867_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_861, x = var_860_cast_fp16)[name = string("transpose_92")]; + tensor var_867_cast_fp16 = matmul(transpose_x = var_867_transpose_x_0, transpose_y = var_867_transpose_y_0, x = var_865_cast_fp16, y = v_39_cast_fp16)[name = string("op_867_cast_fp16")]; + tensor var_868 = const()[name = string("op_868"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 512])]; + tensor var_869_cast_fp16 = transpose(perm = var_868, x = var_867_cast_fp16)[name = string("transpose_89")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_869_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_873_to_fp16 = const()[name = string("op_873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80211072)))]; + tensor var_874_to_fp16 = const()[name = string("op_874_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80735424)))]; + tensor linear_29_cast_fp16 = linear(bias = var_874_to_fp16, weight = var_873_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_881_axes_0 = const()[name = string("op_881_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80736512)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80737600)))]; + tensor var_881_cast_fp16 = layer_norm(axes = var_881_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_715_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_881_cast_fp16")]; + tensor var_890_to_fp16 = const()[name = string("op_890_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80738688)))]; + tensor var_891_to_fp16 = const()[name = string("op_891_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82835904)))]; + tensor linear_30_cast_fp16 = linear(bias = var_891_to_fp16, weight = var_890_to_fp16, x = var_881_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_896_to_fp16 = const()[name = string("op_896_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82840064)))]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84937280)))]; + tensor linear_31_cast_fp16 = linear(bias = var_897_to_fp16, weight = var_896_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 512])]; + tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_18)[name = string("k_cache_17_cast_fp16")]; + tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 512])]; + tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_19)[name = string("v_cache_17_cast_fp16")]; + tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 512])]; + tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; + tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 512])]; + tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; + int32 var_920 = const()[name = string("op_920"), val = int32(-1)]; + tensor var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84938368)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84939456)))]; + fp16 var_926_to_fp16 = const()[name = string("op_926_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_938_cast_fp16 = layer_norm(axes = var_938_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_938_cast_fp16")]; + tensor var_949_to_fp16 = const()[name = string("op_949_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84940544)))]; + tensor var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85464896)))]; + tensor linear_32_cast_fp16 = linear(bias = var_950_to_fp16, weight = var_949_to_fp16, x = var_938_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_953_to_fp16 = const()[name = string("op_953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85465984)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_953_to_fp16, x = var_938_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor var_957_to_fp16 = const()[name = string("op_957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85990336)))]; + tensor var_958_to_fp16 = const()[name = string("op_958_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86514688)))]; + tensor linear_34_cast_fp16 = linear(bias = var_958_to_fp16, weight = var_957_to_fp16, x = var_938_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor var_960_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_960_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_960_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_960_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_960_shape_cast_fp16_to_uint16 = cast(dtype = var_960_shape_cast_fp16_to_uint16_dtype_0, x = var_960_shape_cast_fp16)[name = string("cast_70")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_960_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_69")]; + int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; + tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; + tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_18)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = k_cache1)[name = string("coreml_update_state_20")]; + tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_19)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = v_cache1)[name = string("coreml_update_state_21")]; + int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; + int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(512)]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; + tensor var_976_begin_0 = const()[name = string("op_976_begin_0"), val = tensor([0, 0, 0])]; + tensor var_976_end_mask_0 = const()[name = string("op_976_end_mask_0"), val = tensor([true, false, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = concat_98, end_mask = var_976_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_976_cast_fp16")]; + tensor var_979_begin_0 = const()[name = string("op_979_begin_0"), val = tensor([0, 0, 0])]; + tensor var_979_end_mask_0 = const()[name = string("op_979_end_mask_0"), val = tensor([true, false, true])]; + tensor var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = concat_98, end_mask = var_979_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_979_cast_fp16")]; + tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 8, 64])]; + tensor var_989_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_989_cast_fp16")]; + tensor const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_989_cast_fp16, y = const_46_to_fp16)[name = string("q_35_cast_fp16")]; + tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 8, 64])]; + tensor var_996_cast_fp16 = reshape(shape = concat_101x, x = var_976_cast_fp16)[name = string("op_996_cast_fp16")]; + tensor const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_45_cast_fp16 = mul(x = var_996_cast_fp16, y = const_47_to_fp16)[name = string("k_45_cast_fp16")]; + tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 8, 64])]; + tensor var_1003_cast_fp16 = reshape(shape = concat_102x, x = var_979_cast_fp16)[name = string("op_1003_cast_fp16")]; + tensor var_1004 = const()[name = string("op_1004"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = k_45_cast_fp16)[name = string("transpose_86")]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = q_35_cast_fp16)[name = string("transpose_87")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_65, y = transpose_66)[name = string("qk_25_cast_fp16")]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; + tensor var_1007_begin_0 = const()[name = string("op_1007_begin_0"), val = tensor([0, 0])]; + tensor var_1007_end_mask_0 = const()[name = string("op_1007_end_mask_0"), val = tensor([false, true])]; + tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = concat_103, end_mask = var_1007_end_mask_0, x = mask_to_fp16)[name = string("op_1007_cast_fp16")]; + int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; + tensor var_1008_begin_0 = const()[name = string("op_1008_begin_0"), val = tensor([0, 0])]; + tensor var_1008_end_mask_0 = const()[name = string("op_1008_end_mask_0"), val = tensor([true, false])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = concat_104, end_mask = var_1008_end_mask_0, x = var_1007_cast_fp16)[name = string("op_1008_cast_fp16")]; + tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1008_cast_fp16)[name = string("qk_27_cast_fp16")]; + tensor var_1011_cast_fp16 = softmax(axis = var_920, x = qk_27_cast_fp16)[name = string("op_1011_cast_fp16")]; + bool var_1013_transpose_x_0 = const()[name = string("op_1013_transpose_x_0"), val = bool(false)]; + bool var_1013_transpose_y_0 = const()[name = string("op_1013_transpose_y_0"), val = bool(false)]; + tensor v_45_cast_fp16 = transpose(perm = var_1004, x = var_1003_cast_fp16)[name = string("transpose_88")]; + tensor var_1013_cast_fp16 = matmul(transpose_x = var_1013_transpose_x_0, transpose_y = var_1013_transpose_y_0, x = var_1011_cast_fp16, y = v_45_cast_fp16)[name = string("op_1013_cast_fp16")]; + tensor var_1014 = const()[name = string("op_1014"), val = tensor([0, 2, 1, 3])]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 512])]; + tensor var_1015_cast_fp16 = transpose(perm = var_1014, x = var_1013_cast_fp16)[name = string("transpose_85")]; + tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1015_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86515776)))]; + tensor var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87040128)))]; + tensor linear_35_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; + tensor var_1027_axes_0 = const()[name = string("op_1027_axes_0"), val = tensor([-1])]; + tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87041216)))]; + tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87042304)))]; + tensor var_1027_cast_fp16 = layer_norm(axes = var_1027_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor var_1036_to_fp16 = const()[name = string("op_1036_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87043392)))]; + tensor var_1037_to_fp16 = const()[name = string("op_1037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87567744)))]; + tensor linear_36_cast_fp16 = linear(bias = var_1037_to_fp16, weight = var_1036_to_fp16, x = var_1027_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; + tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; + tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; + tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 8, 64])]; + tensor var_1057_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1057_cast_fp16")]; + tensor const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1057_cast_fp16, y = const_48_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1063 = const()[name = string("op_1063"), val = tensor([1, 1500, 8, -1])]; + tensor var_1064_cast_fp16 = reshape(shape = var_1063, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1064_cast_fp16")]; + tensor const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_49_cast_fp16 = mul(x = var_1064_cast_fp16, y = const_49_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_1070 = const()[name = string("op_1070"), val = tensor([1, 1500, 8, -1])]; + tensor var_1071_cast_fp16 = reshape(shape = var_1070, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1071_cast_fp16")]; + tensor var_1072 = const()[name = string("op_1072"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = k_49_cast_fp16)[name = string("transpose_82")]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = q_39_cast_fp16)[name = string("transpose_83")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_67, y = transpose_68)[name = string("qk_29_cast_fp16")]; + tensor var_1076_cast_fp16 = softmax(axis = var_920, x = qk_29_cast_fp16)[name = string("op_1076_cast_fp16")]; + bool var_1078_transpose_x_0 = const()[name = string("op_1078_transpose_x_0"), val = bool(false)]; + bool var_1078_transpose_y_0 = const()[name = string("op_1078_transpose_y_0"), val = bool(false)]; + tensor v_49_cast_fp16 = transpose(perm = var_1072, x = var_1071_cast_fp16)[name = string("transpose_84")]; + tensor var_1078_cast_fp16 = matmul(transpose_x = var_1078_transpose_x_0, transpose_y = var_1078_transpose_y_0, x = var_1076_cast_fp16, y = v_49_cast_fp16)[name = string("op_1078_cast_fp16")]; + tensor var_1079 = const()[name = string("op_1079"), val = tensor([0, 2, 1, 3])]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 512])]; + tensor var_1080_cast_fp16 = transpose(perm = var_1079, x = var_1078_cast_fp16)[name = string("transpose_81")]; + tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1080_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87568832)))]; + tensor var_1085_to_fp16 = const()[name = string("op_1085_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88093184)))]; + tensor linear_37_cast_fp16 = linear(bias = var_1085_to_fp16, weight = var_1084_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; + tensor var_1092_axes_0 = const()[name = string("op_1092_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88094272)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88095360)))]; + tensor var_1092_cast_fp16 = layer_norm(axes = var_1092_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_926_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1092_cast_fp16")]; + tensor var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88096448)))]; + tensor var_1102_to_fp16 = const()[name = string("op_1102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90193664)))]; + tensor linear_38_cast_fp16 = linear(bias = var_1102_to_fp16, weight = var_1101_to_fp16, x = var_1092_cast_fp16)[name = string("linear_38_cast_fp16")]; + string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; + tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_1107_to_fp16 = const()[name = string("op_1107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90197824)))]; + tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92295040)))]; + tensor linear_39_cast_fp16 = linear(bias = var_1108_to_fp16, weight = var_1107_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; + tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 512])]; + tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_20)[name = string("k_cache_21_cast_fp16")]; + tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 512])]; + tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_21)[name = string("v_cache_21_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([6, 1, 1500, 512])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([6, 1, 1500, 512])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_1131 = const()[name = string("op_1131"), val = int32(-1)]; + tensor var_1149_axes_0 = const()[name = string("op_1149_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92296128)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92297216)))]; + fp16 var_1137_to_fp16 = const()[name = string("op_1137_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1149_cast_fp16 = layer_norm(axes = var_1149_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1149_cast_fp16")]; + tensor var_1160_to_fp16 = const()[name = string("op_1160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92298304)))]; + tensor var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92822656)))]; + tensor linear_40_cast_fp16 = linear(bias = var_1161_to_fp16, weight = var_1160_to_fp16, x = var_1149_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor var_1164_to_fp16 = const()[name = string("op_1164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92823744)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1164_to_fp16, x = var_1149_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93348096)))]; + tensor var_1169_to_fp16 = const()[name = string("op_1169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93872448)))]; + tensor linear_42_cast_fp16 = linear(bias = var_1169_to_fp16, weight = var_1168_to_fp16, x = var_1149_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_1171_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1171_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1171_shape_cast_fp16_to_uint16 = cast(dtype = var_1171_shape_cast_fp16_to_uint16_dtype_0, x = var_1171_shape_cast_fp16)[name = string("cast_68")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1171_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_67")]; + int32 end_step = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step)[name = string("expand_dims_83")]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; + tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; + tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_20)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_22_write_state")]; + tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_21)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_23_write_state")]; + int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; + int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(512)]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step, concat_120_values2_0))[name = string("concat_120")]; + tensor var_1187_begin_0 = const()[name = string("op_1187_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1187_end_mask_0 = const()[name = string("op_1187_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1187_cast_fp16 = slice_by_index(begin = var_1187_begin_0, end = concat_120, end_mask = var_1187_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1187_cast_fp16")]; + tensor var_1190_begin_0 = const()[name = string("op_1190_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1190_end_mask_0 = const()[name = string("op_1190_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = concat_120, end_mask = var_1190_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1190_cast_fp16")]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 8, 64])]; + tensor var_1200_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1200_cast_fp16")]; + tensor const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1200_cast_fp16, y = const_50_to_fp16)[name = string("q_43_cast_fp16")]; + tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 8, 64])]; + tensor var_1207_cast_fp16 = reshape(shape = concat_123x, x = var_1187_cast_fp16)[name = string("op_1207_cast_fp16")]; + tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1207_cast_fp16, y = const_51_to_fp16)[name = string("k_55_cast_fp16")]; + tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 8, 64])]; + tensor var_1214_cast_fp16 = reshape(shape = concat_124x, x = var_1190_cast_fp16)[name = string("op_1214_cast_fp16")]; + tensor var_1215 = const()[name = string("op_1215"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = k_55_cast_fp16)[name = string("transpose_78")]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = q_43_cast_fp16)[name = string("transpose_79")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_69, y = transpose_70)[name = string("qk_31_cast_fp16")]; + int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; + tensor var_1218_begin_0 = const()[name = string("op_1218_begin_0"), val = tensor([0, 0])]; + tensor var_1218_end_mask_0 = const()[name = string("op_1218_end_mask_0"), val = tensor([false, true])]; + tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = concat_125, end_mask = var_1218_end_mask_0, x = mask_to_fp16)[name = string("op_1218_cast_fp16")]; + int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; + tensor var_1219_begin_0 = const()[name = string("op_1219_begin_0"), val = tensor([0, 0])]; + tensor var_1219_end_mask_0 = const()[name = string("op_1219_end_mask_0"), val = tensor([true, false])]; + tensor var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = concat_126, end_mask = var_1219_end_mask_0, x = var_1218_cast_fp16)[name = string("op_1219_cast_fp16")]; + tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1219_cast_fp16)[name = string("qk_33_cast_fp16")]; + tensor var_1222_cast_fp16 = softmax(axis = var_1131, x = qk_33_cast_fp16)[name = string("op_1222_cast_fp16")]; + bool var_1224_transpose_x_0 = const()[name = string("op_1224_transpose_x_0"), val = bool(false)]; + bool var_1224_transpose_y_0 = const()[name = string("op_1224_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1215, x = var_1214_cast_fp16)[name = string("transpose_80")]; + tensor var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = var_1222_cast_fp16, y = v_55_cast_fp16)[name = string("op_1224_cast_fp16")]; + tensor var_1225 = const()[name = string("op_1225"), val = tensor([0, 2, 1, 3])]; + tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 512])]; + tensor var_1226_cast_fp16 = transpose(perm = var_1225, x = var_1224_cast_fp16)[name = string("transpose_77")]; + tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1226_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1230_to_fp16 = const()[name = string("op_1230_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93873536)))]; + tensor var_1231_to_fp16 = const()[name = string("op_1231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94397888)))]; + tensor linear_43_cast_fp16 = linear(bias = var_1231_to_fp16, weight = var_1230_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_1238_axes_0 = const()[name = string("op_1238_axes_0"), val = tensor([-1])]; + tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94398976)))]; + tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94400064)))]; + tensor var_1238_cast_fp16 = layer_norm(axes = var_1238_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1238_cast_fp16")]; + tensor var_1247_to_fp16 = const()[name = string("op_1247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94401152)))]; + tensor var_1248_to_fp16 = const()[name = string("op_1248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94925504)))]; + tensor linear_44_cast_fp16 = linear(bias = var_1248_to_fp16, weight = var_1247_to_fp16, x = var_1238_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; + tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; + tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 8, 64])]; + tensor var_1268_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1268_cast_fp16")]; + tensor const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_1268_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")]; + tensor var_1274 = const()[name = string("op_1274"), val = tensor([1, 1500, 8, -1])]; + tensor var_1275_cast_fp16 = reshape(shape = var_1274, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1275_cast_fp16")]; + tensor const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_1275_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")]; + tensor var_1281 = const()[name = string("op_1281"), val = tensor([1, 1500, 8, -1])]; + tensor var_1282_cast_fp16 = reshape(shape = var_1281, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1282_cast_fp16")]; + tensor var_1283 = const()[name = string("op_1283"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = k_cast_fp16)[name = string("transpose_74")]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = q_cast_fp16)[name = string("transpose_75")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_71, y = transpose_72)[name = string("qk_cast_fp16")]; + tensor var_1287_cast_fp16 = softmax(axis = var_1131, x = qk_cast_fp16)[name = string("op_1287_cast_fp16")]; + bool var_1289_transpose_x_0 = const()[name = string("op_1289_transpose_x_0"), val = bool(false)]; + bool var_1289_transpose_y_0 = const()[name = string("op_1289_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_1283, x = var_1282_cast_fp16)[name = string("transpose_76")]; + tensor var_1289_cast_fp16 = matmul(transpose_x = var_1289_transpose_x_0, transpose_y = var_1289_transpose_y_0, x = var_1287_cast_fp16, y = v_cast_fp16)[name = string("op_1289_cast_fp16")]; + tensor var_1290 = const()[name = string("op_1290"), val = tensor([0, 2, 1, 3])]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 512])]; + tensor var_1291_cast_fp16 = transpose(perm = var_1290, x = var_1289_cast_fp16)[name = string("transpose_73")]; + tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1291_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_1295_to_fp16 = const()[name = string("op_1295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94926592)))]; + tensor var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95450944)))]; + tensor linear_45_cast_fp16 = linear(bias = var_1296_to_fp16, weight = var_1295_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95452032)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95453120)))]; + tensor var_1303_cast_fp16 = layer_norm(axes = var_1303_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1137_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1303_cast_fp16")]; + tensor var_1312_to_fp16 = const()[name = string("op_1312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95454208)))]; + tensor var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97551424)))]; + tensor linear_46_cast_fp16 = linear(bias = var_1313_to_fp16, weight = var_1312_to_fp16, x = var_1303_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; + tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97555584)))]; + tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99652800)))]; + tensor linear_47_cast_fp16 = linear(bias = var_1319_to_fp16, weight = var_1318_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; + tensor var_1332_axes_0 = const()[name = string("op_1332_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99653888)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99654976)))]; + fp16 var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1332_cast_fp16 = layer_norm(axes = var_1332_axes_0, beta = ln_bias_to_fp16, epsilon = var_1323_to_fp16, gamma = ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1332_cast_fp16")]; + tensor var_1342_bias_0_to_fp16 = const()[name = string("op_1342_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99656064)))]; + tensor logits = linear(bias = var_1342_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_1332_cast_fp16)[name = string("op_1342_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/base/decoder_second.mlmodelc/weights/weight.bin b/base/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..3153d1bbcd435e50a4cf80c99254e8a03ffbdc2f --- /dev/null +++ b/base/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94338bcd9475d6d8848699ee40dd6fac40d1e597c1e28d124454a7bf37bff672 +size 99759858 diff --git a/base/encoder.mlmodelc/analytics/coremldata.bin b/base/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2ae2d1f6d9f1fe93e00469b454364bdbe8e910c --- /dev/null +++ b/base/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405dc318443c493222a32916d66c5d908d7cc1d250f73e9a192d5b734a8494ed +size 243 diff --git a/base/encoder.mlmodelc/coremldata.bin b/base/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ca7832b786c7f159522ab760ae244ed35db998f --- /dev/null +++ b/base/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b04d5884b5a2d983f52a0e557aff2e7d3dff78b2a9f9d496a5280546bacfaff +size 318 diff --git a/base/encoder.mlmodelc/metadata.json b/base/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1ab391b2709a9fb2b09b1f7b6d89ca78857d25db --- /dev/null +++ b/base/encoder.mlmodelc/metadata.json @@ -0,0 +1,69 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 512)", + "shortDescription" : "", + "shape" : "[1, 1500, 512]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.mul" : 12, + "Ios18.softmax" : 6, + "Ios18.linear" : 36, + "Ios18.gelu" : 8, + "Ios18.layerNorm" : 13, + "Ios18.transpose" : 25, + "Ios18.matmul" : 12, + "Ios18.conv" : 2, + "Ios18.add" : 13, + "Ios18.reshape" : 24 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 80 × 3000)", + "shortDescription" : "", + "shape" : "[1, 80, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "encoder", + "method" : "predict" + } +] \ No newline at end of file diff --git a/base/encoder.mlmodelc/model.mil b/base/encoder.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..4dc0b2790a85a39113a2dfcbe5820fc579e08264 --- /dev/null +++ b/base/encoder.mlmodelc/model.mil @@ -0,0 +1,384 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor logmel_data) { + string var_32_pad_type_0 = const()[name = string("op_32_pad_type_0"), val = string("custom")]; + tensor var_32_pad_0 = const()[name = string("op_32_pad_0"), val = tensor([1, 1])]; + tensor var_32_strides_0 = const()[name = string("op_32_strides_0"), val = tensor([1])]; + tensor var_32_dilations_0 = const()[name = string("op_32_dilations_0"), val = tensor([1])]; + int32 var_32_groups_0 = const()[name = string("op_32_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245888)))]; + tensor var_32_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_32_dilations_0, groups = var_32_groups_0, pad = var_32_pad_0, pad_type = var_32_pad_type_0, strides = var_32_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_32_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_32_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_50_pad_type_0 = const()[name = string("op_50_pad_type_0"), val = string("custom")]; + tensor var_50_pad_0 = const()[name = string("op_50_pad_0"), val = tensor([1, 1])]; + tensor var_50_strides_0 = const()[name = string("op_50_strides_0"), val = tensor([2])]; + tensor var_50_dilations_0 = const()[name = string("op_50_dilations_0"), val = tensor([1])]; + int32 var_50_groups_0 = const()[name = string("op_50_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246976)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1819904)))]; + tensor var_50_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_50_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_50_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_56 = const()[name = string("op_56"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1820992)))]; + tensor x_5_cast_fp16 = transpose(perm = var_56, x = x_3_cast_fp16)[name = string("transpose_60")]; + tensor var_59_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_59_cast_fp16")]; + int32 var_72 = const()[name = string("op_72"), val = int32(-1)]; + tensor var_88_axes_0 = const()[name = string("op_88_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3357056)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3358144)))]; + fp16 var_78_to_fp16 = const()[name = string("op_78_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_88_cast_fp16 = layer_norm(axes = var_88_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_78_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_59_cast_fp16)[name = string("op_88_cast_fp16")]; + tensor var_99_to_fp16 = const()[name = string("op_99_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3359232)))]; + tensor var_100_to_fp16 = const()[name = string("op_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3883584)))]; + tensor linear_0_cast_fp16 = linear(bias = var_100_to_fp16, weight = var_99_to_fp16, x = var_88_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_103_to_fp16 = const()[name = string("op_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3884672)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4409024)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_103_to_fp16, x = var_88_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_107_to_fp16 = const()[name = string("op_107_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4410112)))]; + tensor var_108_to_fp16 = const()[name = string("op_108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4934464)))]; + tensor linear_2_cast_fp16 = linear(bias = var_108_to_fp16, weight = var_107_to_fp16, x = var_88_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_116 = const()[name = string("op_116"), val = tensor([1, 1500, 8, -1])]; + tensor var_117_cast_fp16 = reshape(shape = var_116, x = linear_0_cast_fp16)[name = string("op_117_cast_fp16")]; + tensor const_42_to_fp16 = const()[name = string("const_42_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_117_cast_fp16, y = const_42_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_123 = const()[name = string("op_123"), val = tensor([1, 1500, 8, -1])]; + tensor var_124_cast_fp16 = reshape(shape = var_123, x = linear_1_cast_fp16)[name = string("op_124_cast_fp16")]; + tensor const_43_to_fp16 = const()[name = string("const_43_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_124_cast_fp16, y = const_43_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_130 = const()[name = string("op_130"), val = tensor([1, 1500, 8, -1])]; + tensor var_131_cast_fp16 = reshape(shape = var_130, x = linear_2_cast_fp16)[name = string("op_131_cast_fp16")]; + tensor var_132 = const()[name = string("op_132"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_25 = transpose(perm = transpose_25_perm_0, x = k_3_cast_fp16)[name = string("transpose_57")]; + tensor transpose_24 = transpose(perm = transpose_24_perm_0, x = q_3_cast_fp16)[name = string("transpose_58")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_24, y = transpose_25)[name = string("qk_1_cast_fp16")]; + tensor var_136_cast_fp16 = softmax(axis = var_72, x = qk_1_cast_fp16)[name = string("op_136_cast_fp16")]; + bool var_138_transpose_x_0 = const()[name = string("op_138_transpose_x_0"), val = bool(false)]; + bool var_138_transpose_y_0 = const()[name = string("op_138_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_132, x = var_131_cast_fp16)[name = string("transpose_59")]; + tensor var_138_cast_fp16 = matmul(transpose_x = var_138_transpose_x_0, transpose_y = var_138_transpose_y_0, x = var_136_cast_fp16, y = v_3_cast_fp16)[name = string("op_138_cast_fp16")]; + tensor var_139 = const()[name = string("op_139"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 512])]; + tensor var_140_cast_fp16 = transpose(perm = var_139, x = var_138_cast_fp16)[name = string("transpose_56")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_140_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_144_to_fp16 = const()[name = string("op_144_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4935552)))]; + tensor var_145_to_fp16 = const()[name = string("op_145_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5459904)))]; + tensor linear_3_cast_fp16 = linear(bias = var_145_to_fp16, weight = var_144_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_59_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_152_axes_0 = const()[name = string("op_152_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5460992)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5462080)))]; + tensor var_152_cast_fp16 = layer_norm(axes = var_152_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_78_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_152_cast_fp16")]; + tensor var_161_to_fp16 = const()[name = string("op_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5463168)))]; + tensor var_162_to_fp16 = const()[name = string("op_162_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7560384)))]; + tensor linear_4_cast_fp16 = linear(bias = var_162_to_fp16, weight = var_161_to_fp16, x = var_152_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_167_to_fp16 = const()[name = string("op_167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7564544)))]; + tensor var_168_to_fp16 = const()[name = string("op_168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9661760)))]; + tensor linear_5_cast_fp16 = linear(bias = var_168_to_fp16, weight = var_167_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_178 = const()[name = string("op_178"), val = int32(-1)]; + tensor var_194_axes_0 = const()[name = string("op_194_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9662848)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9663936)))]; + fp16 var_184_to_fp16 = const()[name = string("op_184_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_194_cast_fp16 = layer_norm(axes = var_194_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_184_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_194_cast_fp16")]; + tensor var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9665024)))]; + tensor var_206_to_fp16 = const()[name = string("op_206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10189376)))]; + tensor linear_6_cast_fp16 = linear(bias = var_206_to_fp16, weight = var_205_to_fp16, x = var_194_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10190464)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_209_to_fp16, x = var_194_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10714816)))]; + tensor var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11239168)))]; + tensor linear_8_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_194_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_222 = const()[name = string("op_222"), val = tensor([1, 1500, 8, -1])]; + tensor var_223_cast_fp16 = reshape(shape = var_222, x = linear_6_cast_fp16)[name = string("op_223_cast_fp16")]; + tensor const_44_to_fp16 = const()[name = string("const_44_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_223_cast_fp16, y = const_44_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_229 = const()[name = string("op_229"), val = tensor([1, 1500, 8, -1])]; + tensor var_230_cast_fp16 = reshape(shape = var_229, x = linear_7_cast_fp16)[name = string("op_230_cast_fp16")]; + tensor const_45_to_fp16 = const()[name = string("const_45_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_230_cast_fp16, y = const_45_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_236 = const()[name = string("op_236"), val = tensor([1, 1500, 8, -1])]; + tensor var_237_cast_fp16 = reshape(shape = var_236, x = linear_8_cast_fp16)[name = string("op_237_cast_fp16")]; + tensor var_238 = const()[name = string("op_238"), val = tensor([0, 2, 1, 3])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_27_perm_0 = const()[name = string("transpose_27_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_27 = transpose(perm = transpose_27_perm_0, x = k_7_cast_fp16)[name = string("transpose_53")]; + tensor transpose_26 = transpose(perm = transpose_26_perm_0, x = q_7_cast_fp16)[name = string("transpose_54")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_26, y = transpose_27)[name = string("qk_3_cast_fp16")]; + tensor var_242_cast_fp16 = softmax(axis = var_178, x = qk_3_cast_fp16)[name = string("op_242_cast_fp16")]; + bool var_244_transpose_x_0 = const()[name = string("op_244_transpose_x_0"), val = bool(false)]; + bool var_244_transpose_y_0 = const()[name = string("op_244_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_238, x = var_237_cast_fp16)[name = string("transpose_55")]; + tensor var_244_cast_fp16 = matmul(transpose_x = var_244_transpose_x_0, transpose_y = var_244_transpose_y_0, x = var_242_cast_fp16, y = v_7_cast_fp16)[name = string("op_244_cast_fp16")]; + tensor var_245 = const()[name = string("op_245"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 512])]; + tensor var_246_cast_fp16 = transpose(perm = var_245, x = var_244_cast_fp16)[name = string("transpose_52")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_246_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_250_to_fp16 = const()[name = string("op_250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11240256)))]; + tensor var_251_to_fp16 = const()[name = string("op_251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11764608)))]; + tensor linear_9_cast_fp16 = linear(bias = var_251_to_fp16, weight = var_250_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_258_axes_0 = const()[name = string("op_258_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11765696)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11766784)))]; + tensor var_258_cast_fp16 = layer_norm(axes = var_258_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_184_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_258_cast_fp16")]; + tensor var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11767872)))]; + tensor var_268_to_fp16 = const()[name = string("op_268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13865088)))]; + tensor linear_10_cast_fp16 = linear(bias = var_268_to_fp16, weight = var_267_to_fp16, x = var_258_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_273_to_fp16 = const()[name = string("op_273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13869248)))]; + tensor var_274_to_fp16 = const()[name = string("op_274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15966464)))]; + tensor linear_11_cast_fp16 = linear(bias = var_274_to_fp16, weight = var_273_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_284 = const()[name = string("op_284"), val = int32(-1)]; + tensor var_300_axes_0 = const()[name = string("op_300_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15967552)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15968640)))]; + fp16 var_290_to_fp16 = const()[name = string("op_290_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_300_cast_fp16 = layer_norm(axes = var_300_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_290_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_300_cast_fp16")]; + tensor var_311_to_fp16 = const()[name = string("op_311_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15969728)))]; + tensor var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16494080)))]; + tensor linear_12_cast_fp16 = linear(bias = var_312_to_fp16, weight = var_311_to_fp16, x = var_300_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_315_to_fp16 = const()[name = string("op_315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16495168)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_315_to_fp16, x = var_300_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17019520)))]; + tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17543872)))]; + tensor linear_14_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_300_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_328 = const()[name = string("op_328"), val = tensor([1, 1500, 8, -1])]; + tensor var_329_cast_fp16 = reshape(shape = var_328, x = linear_12_cast_fp16)[name = string("op_329_cast_fp16")]; + tensor const_46_to_fp16 = const()[name = string("const_46_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_329_cast_fp16, y = const_46_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_335 = const()[name = string("op_335"), val = tensor([1, 1500, 8, -1])]; + tensor var_336_cast_fp16 = reshape(shape = var_335, x = linear_13_cast_fp16)[name = string("op_336_cast_fp16")]; + tensor const_47_to_fp16 = const()[name = string("const_47_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_336_cast_fp16, y = const_47_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_342 = const()[name = string("op_342"), val = tensor([1, 1500, 8, -1])]; + tensor var_343_cast_fp16 = reshape(shape = var_342, x = linear_14_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_344 = const()[name = string("op_344"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_28_perm_0 = const()[name = string("transpose_28_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_29 = transpose(perm = transpose_29_perm_0, x = k_11_cast_fp16)[name = string("transpose_49")]; + tensor transpose_28 = transpose(perm = transpose_28_perm_0, x = q_11_cast_fp16)[name = string("transpose_50")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_28, y = transpose_29)[name = string("qk_5_cast_fp16")]; + tensor var_348_cast_fp16 = softmax(axis = var_284, x = qk_5_cast_fp16)[name = string("op_348_cast_fp16")]; + bool var_350_transpose_x_0 = const()[name = string("op_350_transpose_x_0"), val = bool(false)]; + bool var_350_transpose_y_0 = const()[name = string("op_350_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_344, x = var_343_cast_fp16)[name = string("transpose_51")]; + tensor var_350_cast_fp16 = matmul(transpose_x = var_350_transpose_x_0, transpose_y = var_350_transpose_y_0, x = var_348_cast_fp16, y = v_11_cast_fp16)[name = string("op_350_cast_fp16")]; + tensor var_351 = const()[name = string("op_351"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 512])]; + tensor var_352_cast_fp16 = transpose(perm = var_351, x = var_350_cast_fp16)[name = string("transpose_48")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_352_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_356_to_fp16 = const()[name = string("op_356_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17544960)))]; + tensor var_357_to_fp16 = const()[name = string("op_357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18069312)))]; + tensor linear_15_cast_fp16 = linear(bias = var_357_to_fp16, weight = var_356_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_364_axes_0 = const()[name = string("op_364_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18070400)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18071488)))]; + tensor var_364_cast_fp16 = layer_norm(axes = var_364_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_290_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_364_cast_fp16")]; + tensor var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18072576)))]; + tensor var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20169792)))]; + tensor linear_16_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = var_364_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_379_to_fp16 = const()[name = string("op_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20173952)))]; + tensor var_380_to_fp16 = const()[name = string("op_380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22271168)))]; + tensor linear_17_cast_fp16 = linear(bias = var_380_to_fp16, weight = var_379_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_390 = const()[name = string("op_390"), val = int32(-1)]; + tensor var_406_axes_0 = const()[name = string("op_406_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22272256)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22273344)))]; + fp16 var_396_to_fp16 = const()[name = string("op_396_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_406_cast_fp16 = layer_norm(axes = var_406_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_396_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_406_cast_fp16")]; + tensor var_417_to_fp16 = const()[name = string("op_417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22274432)))]; + tensor var_418_to_fp16 = const()[name = string("op_418_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22798784)))]; + tensor linear_18_cast_fp16 = linear(bias = var_418_to_fp16, weight = var_417_to_fp16, x = var_406_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22799872)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_421_to_fp16, x = var_406_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23324224)))]; + tensor var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23848576)))]; + tensor linear_20_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_406_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_434 = const()[name = string("op_434"), val = tensor([1, 1500, 8, -1])]; + tensor var_435_cast_fp16 = reshape(shape = var_434, x = linear_18_cast_fp16)[name = string("op_435_cast_fp16")]; + tensor const_48_to_fp16 = const()[name = string("const_48_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_435_cast_fp16, y = const_48_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_441 = const()[name = string("op_441"), val = tensor([1, 1500, 8, -1])]; + tensor var_442_cast_fp16 = reshape(shape = var_441, x = linear_19_cast_fp16)[name = string("op_442_cast_fp16")]; + tensor const_49_to_fp16 = const()[name = string("const_49_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_442_cast_fp16, y = const_49_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_448 = const()[name = string("op_448"), val = tensor([1, 1500, 8, -1])]; + tensor var_449_cast_fp16 = reshape(shape = var_448, x = linear_20_cast_fp16)[name = string("op_449_cast_fp16")]; + tensor var_450 = const()[name = string("op_450"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_30_perm_0 = const()[name = string("transpose_30_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_31_perm_0 = const()[name = string("transpose_31_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_31 = transpose(perm = transpose_31_perm_0, x = k_15_cast_fp16)[name = string("transpose_45")]; + tensor transpose_30 = transpose(perm = transpose_30_perm_0, x = q_15_cast_fp16)[name = string("transpose_46")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_30, y = transpose_31)[name = string("qk_7_cast_fp16")]; + tensor var_454_cast_fp16 = softmax(axis = var_390, x = qk_7_cast_fp16)[name = string("op_454_cast_fp16")]; + bool var_456_transpose_x_0 = const()[name = string("op_456_transpose_x_0"), val = bool(false)]; + bool var_456_transpose_y_0 = const()[name = string("op_456_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_450, x = var_449_cast_fp16)[name = string("transpose_47")]; + tensor var_456_cast_fp16 = matmul(transpose_x = var_456_transpose_x_0, transpose_y = var_456_transpose_y_0, x = var_454_cast_fp16, y = v_15_cast_fp16)[name = string("op_456_cast_fp16")]; + tensor var_457 = const()[name = string("op_457"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 512])]; + tensor var_458_cast_fp16 = transpose(perm = var_457, x = var_456_cast_fp16)[name = string("transpose_44")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_458_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_462_to_fp16 = const()[name = string("op_462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23849664)))]; + tensor var_463_to_fp16 = const()[name = string("op_463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24374016)))]; + tensor linear_21_cast_fp16 = linear(bias = var_463_to_fp16, weight = var_462_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_470_axes_0 = const()[name = string("op_470_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24375104)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24376192)))]; + tensor var_470_cast_fp16 = layer_norm(axes = var_470_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_396_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_470_cast_fp16")]; + tensor var_479_to_fp16 = const()[name = string("op_479_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24377280)))]; + tensor var_480_to_fp16 = const()[name = string("op_480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26474496)))]; + tensor linear_22_cast_fp16 = linear(bias = var_480_to_fp16, weight = var_479_to_fp16, x = var_470_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_485_to_fp16 = const()[name = string("op_485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26478656)))]; + tensor var_486_to_fp16 = const()[name = string("op_486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28575872)))]; + tensor linear_23_cast_fp16 = linear(bias = var_486_to_fp16, weight = var_485_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")]; + int32 var_496 = const()[name = string("op_496"), val = int32(-1)]; + tensor var_512_axes_0 = const()[name = string("op_512_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28576960)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28578048)))]; + fp16 var_502_to_fp16 = const()[name = string("op_502_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_512_cast_fp16 = layer_norm(axes = var_512_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_502_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_512_cast_fp16")]; + tensor var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28579136)))]; + tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29103488)))]; + tensor linear_24_cast_fp16 = linear(bias = var_524_to_fp16, weight = var_523_to_fp16, x = var_512_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29104576)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_527_to_fp16, x = var_512_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29628928)))]; + tensor var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30153280)))]; + tensor linear_26_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_512_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_540 = const()[name = string("op_540"), val = tensor([1, 1500, 8, -1])]; + tensor var_541_cast_fp16 = reshape(shape = var_540, x = linear_24_cast_fp16)[name = string("op_541_cast_fp16")]; + tensor const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_541_cast_fp16, y = const_50_to_fp16)[name = string("q_19_cast_fp16")]; + tensor var_547 = const()[name = string("op_547"), val = tensor([1, 1500, 8, -1])]; + tensor var_548_cast_fp16 = reshape(shape = var_547, x = linear_25_cast_fp16)[name = string("op_548_cast_fp16")]; + tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_548_cast_fp16, y = const_51_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_554 = const()[name = string("op_554"), val = tensor([1, 1500, 8, -1])]; + tensor var_555_cast_fp16 = reshape(shape = var_554, x = linear_26_cast_fp16)[name = string("op_555_cast_fp16")]; + tensor var_556 = const()[name = string("op_556"), val = tensor([0, 2, 1, 3])]; + bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)]; + bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)]; + tensor transpose_32_perm_0 = const()[name = string("transpose_32_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_33 = transpose(perm = transpose_33_perm_0, x = k_19_cast_fp16)[name = string("transpose_41")]; + tensor transpose_32 = transpose(perm = transpose_32_perm_0, x = q_19_cast_fp16)[name = string("transpose_42")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_32, y = transpose_33)[name = string("qk_9_cast_fp16")]; + tensor var_560_cast_fp16 = softmax(axis = var_496, x = qk_9_cast_fp16)[name = string("op_560_cast_fp16")]; + bool var_562_transpose_x_0 = const()[name = string("op_562_transpose_x_0"), val = bool(false)]; + bool var_562_transpose_y_0 = const()[name = string("op_562_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_556, x = var_555_cast_fp16)[name = string("transpose_43")]; + tensor var_562_cast_fp16 = matmul(transpose_x = var_562_transpose_x_0, transpose_y = var_562_transpose_y_0, x = var_560_cast_fp16, y = v_19_cast_fp16)[name = string("op_562_cast_fp16")]; + tensor var_563 = const()[name = string("op_563"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 1500, 512])]; + tensor var_564_cast_fp16 = transpose(perm = var_563, x = var_562_cast_fp16)[name = string("transpose_40")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = var_564_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = string("op_568_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30154368)))]; + tensor var_569_to_fp16 = const()[name = string("op_569_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30678720)))]; + tensor linear_27_cast_fp16 = linear(bias = var_569_to_fp16, weight = var_568_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_576_axes_0 = const()[name = string("op_576_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30679808)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30680896)))]; + tensor var_576_cast_fp16 = layer_norm(axes = var_576_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_502_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_576_cast_fp16")]; + tensor var_585_to_fp16 = const()[name = string("op_585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30681984)))]; + tensor var_586_to_fp16 = const()[name = string("op_586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32779200)))]; + tensor linear_28_cast_fp16 = linear(bias = var_586_to_fp16, weight = var_585_to_fp16, x = var_576_cast_fp16)[name = string("linear_28_cast_fp16")]; + string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")]; + tensor var_591_to_fp16 = const()[name = string("op_591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32783360)))]; + tensor var_592_to_fp16 = const()[name = string("op_592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34880576)))]; + tensor linear_29_cast_fp16 = linear(bias = var_592_to_fp16, weight = var_591_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")]; + int32 var_602 = const()[name = string("op_602"), val = int32(-1)]; + tensor var_618_axes_0 = const()[name = string("op_618_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34881664)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34882752)))]; + fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_618_cast_fp16 = layer_norm(axes = var_618_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_618_cast_fp16")]; + tensor var_629_to_fp16 = const()[name = string("op_629_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34883840)))]; + tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35408192)))]; + tensor linear_30_cast_fp16 = linear(bias = var_630_to_fp16, weight = var_629_to_fp16, x = var_618_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor var_633_to_fp16 = const()[name = string("op_633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35409280)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_633_to_fp16, x = var_618_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35933632)))]; + tensor var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36457984)))]; + tensor linear_32_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_618_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_646 = const()[name = string("op_646"), val = tensor([1, 1500, 8, -1])]; + tensor var_647_cast_fp16 = reshape(shape = var_646, x = linear_30_cast_fp16)[name = string("op_647_cast_fp16")]; + tensor const_52_to_fp16 = const()[name = string("const_52_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_647_cast_fp16, y = const_52_to_fp16)[name = string("q_cast_fp16")]; + tensor var_653 = const()[name = string("op_653"), val = tensor([1, 1500, 8, -1])]; + tensor var_654_cast_fp16 = reshape(shape = var_653, x = linear_31_cast_fp16)[name = string("op_654_cast_fp16")]; + tensor const_53_to_fp16 = const()[name = string("const_53_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_654_cast_fp16, y = const_53_to_fp16)[name = string("k_cast_fp16")]; + tensor var_660 = const()[name = string("op_660"), val = tensor([1, 1500, 8, -1])]; + tensor var_661_cast_fp16 = reshape(shape = var_660, x = linear_32_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor var_662 = const()[name = string("op_662"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_35 = transpose(perm = transpose_35_perm_0, x = k_cast_fp16)[name = string("transpose_37")]; + tensor transpose_34 = transpose(perm = transpose_34_perm_0, x = q_cast_fp16)[name = string("transpose_38")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_34, y = transpose_35)[name = string("qk_cast_fp16")]; + tensor var_666_cast_fp16 = softmax(axis = var_602, x = qk_cast_fp16)[name = string("op_666_cast_fp16")]; + bool var_668_transpose_x_0 = const()[name = string("op_668_transpose_x_0"), val = bool(false)]; + bool var_668_transpose_y_0 = const()[name = string("op_668_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_662, x = var_661_cast_fp16)[name = string("transpose_39")]; + tensor var_668_cast_fp16 = matmul(transpose_x = var_668_transpose_x_0, transpose_y = var_668_transpose_y_0, x = var_666_cast_fp16, y = v_cast_fp16)[name = string("op_668_cast_fp16")]; + tensor var_669 = const()[name = string("op_669"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([1, 1500, 512])]; + tensor var_670_cast_fp16 = transpose(perm = var_669, x = var_668_cast_fp16)[name = string("transpose_36")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = var_670_cast_fp16)[name = string("x_71_cast_fp16")]; + tensor var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36459072)))]; + tensor var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36983424)))]; + tensor linear_33_cast_fp16 = linear(bias = var_675_to_fp16, weight = var_674_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_682_axes_0 = const()[name = string("op_682_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36984512)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36985600)))]; + tensor var_682_cast_fp16 = layer_norm(axes = var_682_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_682_cast_fp16")]; + tensor var_691_to_fp16 = const()[name = string("op_691_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36986688)))]; + tensor var_692_to_fp16 = const()[name = string("op_692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39083904)))]; + tensor linear_34_cast_fp16 = linear(bias = var_692_to_fp16, weight = var_691_to_fp16, x = var_682_cast_fp16)[name = string("linear_34_cast_fp16")]; + string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")]; + tensor var_697_to_fp16 = const()[name = string("op_697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39088064)))]; + tensor var_698_to_fp16 = const()[name = string("op_698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41185280)))]; + tensor linear_35_cast_fp16 = linear(bias = var_698_to_fp16, weight = var_697_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_711_axes_0 = const()[name = string("op_711_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41186368)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41187456)))]; + fp16 var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_711_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_702_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_711_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/base/encoder.mlmodelc/weights/weight.bin b/base/encoder.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d9f2ba5d1a4cbde8e0e1b8f185ff5b7be0ce74d --- /dev/null +++ b/base/encoder.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45bee989219532c4cec616d439c51f280ac9d7b04f7847c4b7d7daba1d47523 +size 41188544 diff --git a/base/model_dims.json b/base/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..749969fc8563817aae59f05daa17e1a062f383a4 --- /dev/null +++ b/base/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 512, + "n_audio_head": 8, + "n_audio_layer": 6, + "n_vocab": 51865, + "n_text_ctx": 448, + "n_text_state": 512, + "n_text_head": 8, + "n_text_layer": 6 +} \ No newline at end of file diff --git a/compile_model.sh b/compile_model.sh new file mode 100755 index 0000000000000000000000000000000000000000..8b92a248c93902fa29db67e23daa719b5bdb433b --- /dev/null +++ b/compile_model.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +for d in work/* +do + echo $d + pushd $d >/dev/null + + if [ -d encoder ]; then + xcrun coremlcompiler compile encoder/chunked_pipeline.mlpackage . + rm -rf encoder.mlmodelc + mv chunked_pipeline.mlmodelc encoder.mlmodelc + else + xcrun coremlcompiler compile encoder.mlpackage . + fi + xcrun coremlcompiler compile decoder_first.mlpackage . + xcrun coremlcompiler compile decoder_second.mlpackage . + + popd >/dev/null +done + +mkdir -p output +for d in work/* +do + out=${d/work/output} + mkdir -p $out + mv $d/*.mlmodelc $d/model_dims.json $out/ +done + +mkdir -p index +for d in output/* +do + model=${d##*/} + (cd $d && find * -type f) > index/$model +done \ No newline at end of file diff --git a/index/base b/index/base new file mode 100644 index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519 --- /dev/null +++ b/index/base @@ -0,0 +1,16 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/weights/weight.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model.mil +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/index/large-v2 b/index/large-v2 new file mode 100644 index 0000000000000000000000000000000000000000..e9441a23015009aced8ebd0cb6b7d41f2f6318bb --- /dev/null +++ b/index/large-v2 @@ -0,0 +1,22 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model0/weights/0-weight.bin +encoder.mlmodelc/model0/model.mil +encoder.mlmodelc/model0/coremldata.bin +encoder.mlmodelc/model0/analytics/coremldata.bin +encoder.mlmodelc/model1/weights/1-weight.bin +encoder.mlmodelc/model1/model.mil +encoder.mlmodelc/model1/coremldata.bin +encoder.mlmodelc/model1/analytics/coremldata.bin +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/index/large-v3 b/index/large-v3 new file mode 100644 index 0000000000000000000000000000000000000000..e9441a23015009aced8ebd0cb6b7d41f2f6318bb --- /dev/null +++ b/index/large-v3 @@ -0,0 +1,22 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model0/weights/0-weight.bin +encoder.mlmodelc/model0/model.mil +encoder.mlmodelc/model0/coremldata.bin +encoder.mlmodelc/model0/analytics/coremldata.bin +encoder.mlmodelc/model1/weights/1-weight.bin +encoder.mlmodelc/model1/model.mil +encoder.mlmodelc/model1/coremldata.bin +encoder.mlmodelc/model1/analytics/coremldata.bin +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/index/medium b/index/medium new file mode 100644 index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519 --- /dev/null +++ b/index/medium @@ -0,0 +1,16 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/weights/weight.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model.mil +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/index/small b/index/small new file mode 100644 index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519 --- /dev/null +++ b/index/small @@ -0,0 +1,16 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/weights/weight.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model.mil +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/index/tiny b/index/tiny new file mode 100644 index 0000000000000000000000000000000000000000..8d76ac58164add489901c566cff97bec4d955519 --- /dev/null +++ b/index/tiny @@ -0,0 +1,16 @@ +decoder_first.mlmodelc/weights/weight.bin +decoder_first.mlmodelc/metadata.json +decoder_first.mlmodelc/model.mil +decoder_first.mlmodelc/coremldata.bin +decoder_first.mlmodelc/analytics/coremldata.bin +decoder_second.mlmodelc/weights/weight.bin +decoder_second.mlmodelc/metadata.json +decoder_second.mlmodelc/model.mil +decoder_second.mlmodelc/coremldata.bin +decoder_second.mlmodelc/analytics/coremldata.bin +encoder.mlmodelc/weights/weight.bin +encoder.mlmodelc/metadata.json +encoder.mlmodelc/model.mil +encoder.mlmodelc/coremldata.bin +encoder.mlmodelc/analytics/coremldata.bin +model_dims.json diff --git a/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin b/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3502c1971106c8ddba15a6d19cbe212e9040b51 --- /dev/null +++ b/large-v2/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a564dfd67cfcb3c0ee8cd9f7ef9f303fbfc561e635709bd3a46c5870571079de +size 243 diff --git a/large-v2/decoder_first.mlmodelc/coremldata.bin b/large-v2/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..78fa71791f49b098c63687ec844348e5cd25cd92 --- /dev/null +++ b/large-v2/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6214be9e110a102836fb1fdb960a2fb564e60f5d9e3d1e25a9b7f978309480e +size 453 diff --git a/large-v2/decoder_first.mlmodelc/metadata.json b/large-v2/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..62548d3742d04f712f1bad76294f859bb5029d22 --- /dev/null +++ b/large-v2/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 66, + "Shape" : 64, + "Ios18.linear" : 64, + "Identity" : 1, + "Ios18.gather" : 64, + "Ios18.concat" : 64, + "Ios18.sliceUpdate" : 66, + "Ios18.cast" : 128, + "Ios18.expandDims" : 64, + "Ios18.readState" : 66 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 1280", + "shapeRange" : "[[1, 1], [1, 1500], [1280, 1280]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 1280)", + "type" : "MultiArray", + "shape" : "[1, 1, 1280]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v2/decoder_first.mlmodelc/model.mil b/large-v2/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..5e9505ec80acb3d396de560006ff76f4da79cc6a --- /dev/null +++ b/large-v2/decoder_first.mlmodelc/model.mil @@ -0,0 +1,1851 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1280]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1280, 1280]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_131_to_fp16 = const()[name = string("op_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36700288)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39977152)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_131_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39979776)))]; + tensor var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43256640)))]; + tensor linear_1_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_138_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_138_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_138_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_138_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_138_shape_cast_fp16_to_int16 = cast(dtype = var_138_shape_cast_fp16_to_int16_dtype_0, x = var_138_shape_cast_fp16)[name = string("cast_199")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_138_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_198")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")]; + tensor var_143_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_143_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_143_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_143_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_143_shape_cast_fp16_to_uint16 = cast(dtype = var_143_shape_cast_fp16_to_uint16_dtype_0, x = var_143_shape_cast_fp16)[name = string("cast_197")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_143_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_196")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")]; + tensor var_165_to_fp16 = const()[name = string("op_165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43259264)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_165_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_169_to_fp16 = const()[name = string("op_169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46536128)))]; + tensor var_170_to_fp16 = const()[name = string("op_170_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49812992)))]; + tensor linear_3_cast_fp16 = linear(bias = var_170_to_fp16, weight = var_169_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_172_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_172_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_172_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_172_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_172_shape_cast_fp16_to_uint16 = cast(dtype = var_172_shape_cast_fp16_to_uint16_dtype_0, x = var_172_shape_cast_fp16)[name = string("cast_195")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_172_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_194")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")]; + tensor var_177_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_177_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_177_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_177_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_177_shape_cast_fp16_to_uint16 = cast(dtype = var_177_shape_cast_fp16_to_uint16_dtype_0, x = var_177_shape_cast_fp16)[name = string("cast_193")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_177_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_192")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")]; + tensor var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49815616)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_199_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53092480)))]; + tensor var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56369344)))]; + tensor linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_206_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_206_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_206_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_206_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_206_shape_cast_fp16_to_uint16 = cast(dtype = var_206_shape_cast_fp16_to_uint16_dtype_0, x = var_206_shape_cast_fp16)[name = string("cast_191")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_206_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_190")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")]; + tensor var_211_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_211_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_211_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_211_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_211_shape_cast_fp16_to_uint16 = cast(dtype = var_211_shape_cast_fp16_to_uint16_dtype_0, x = var_211_shape_cast_fp16)[name = string("cast_189")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_211_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_188")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")]; + tensor var_233_to_fp16 = const()[name = string("op_233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56371968)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_233_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59648832)))]; + tensor var_238_to_fp16 = const()[name = string("op_238_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62925696)))]; + tensor linear_7_cast_fp16 = linear(bias = var_238_to_fp16, weight = var_237_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_240_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_240_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_240_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_240_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_240_shape_cast_fp16_to_uint16 = cast(dtype = var_240_shape_cast_fp16_to_uint16_dtype_0, x = var_240_shape_cast_fp16)[name = string("cast_187")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_240_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_186")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")]; + tensor var_245_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_245_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_245_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_245_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_245_shape_cast_fp16_to_uint16 = cast(dtype = var_245_shape_cast_fp16_to_uint16_dtype_0, x = var_245_shape_cast_fp16)[name = string("cast_185")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_245_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_184")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")]; + tensor var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62928320)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_267_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")]; + tensor var_271_to_fp16 = const()[name = string("op_271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66205184)))]; + tensor var_272_to_fp16 = const()[name = string("op_272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69482048)))]; + tensor linear_9_cast_fp16 = linear(bias = var_272_to_fp16, weight = var_271_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")]; + tensor var_274_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_274_shape_cast_fp16")]; + int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)]; + int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)]; + bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)]; + string var_274_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_274_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)]; + tensor var_274_shape_cast_fp16_to_uint16 = cast(dtype = var_274_shape_cast_fp16_to_uint16_dtype_0, x = var_274_shape_cast_fp16)[name = string("cast_183")]; + uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_274_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")]; + string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor([0])]; + int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_182")]; + tensor expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([4, 0, 0, 0])]; + tensor concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor([0])]; + tensor concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor([0])]; + tensor concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor([0])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")]; + tensor k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")]; + tensor var_279_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_279_shape_cast_fp16")]; + int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)]; + int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)]; + bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)]; + string var_279_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_279_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)]; + tensor var_279_shape_cast_fp16_to_uint16 = cast(dtype = var_279_shape_cast_fp16_to_uint16_dtype_0, x = var_279_shape_cast_fp16)[name = string("cast_181")]; + uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_279_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")]; + string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor([0])]; + int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_180")]; + tensor expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; + tensor concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor([0])]; + tensor concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor([0])]; + tensor concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor([0])]; + int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; + bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; + tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")]; + tensor v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")]; + tensor var_301_to_fp16 = const()[name = string("op_301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69484672)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_301_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")]; + tensor var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72761536)))]; + tensor var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76038400)))]; + tensor linear_11_cast_fp16 = linear(bias = var_306_to_fp16, weight = var_305_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")]; + tensor var_308_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_308_shape_cast_fp16")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_308_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_308_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)]; + tensor var_308_shape_cast_fp16_to_uint16 = cast(dtype = var_308_shape_cast_fp16_to_uint16_dtype_0, x = var_308_shape_cast_fp16)[name = string("cast_179")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_308_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_178")]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([5, 0, 0, 0])]; + tensor concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor([0])]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")]; + tensor k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")]; + tensor var_313_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_313_shape_cast_fp16")]; + int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)]; + int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)]; + bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)]; + string var_313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)]; + tensor var_313_shape_cast_fp16_to_uint16 = cast(dtype = var_313_shape_cast_fp16_to_uint16_dtype_0, x = var_313_shape_cast_fp16)[name = string("cast_177")]; + uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_313_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")]; + string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor([0])]; + int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_176")]; + tensor expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([5, 0, 0, 0])]; + tensor concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor([0])]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")]; + tensor v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")]; + tensor var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76041024)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_335_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")]; + tensor var_339_to_fp16 = const()[name = string("op_339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79317888)))]; + tensor var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82594752)))]; + tensor linear_13_cast_fp16 = linear(bias = var_340_to_fp16, weight = var_339_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")]; + tensor var_342_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_342_shape_cast_fp16")]; + int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)]; + int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)]; + bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)]; + string var_342_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_342_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)]; + tensor var_342_shape_cast_fp16_to_uint16 = cast(dtype = var_342_shape_cast_fp16_to_uint16_dtype_0, x = var_342_shape_cast_fp16)[name = string("cast_175")]; + uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_342_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")]; + string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor([0])]; + int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_174")]; + tensor expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([6, 0, 0, 0])]; + tensor concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor([0])]; + tensor concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor([0])]; + tensor concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor([0])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")]; + tensor k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")]; + tensor var_347_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_347_shape_cast_fp16")]; + int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)]; + int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)]; + bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)]; + string var_347_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_347_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)]; + tensor var_347_shape_cast_fp16_to_uint16 = cast(dtype = var_347_shape_cast_fp16_to_uint16_dtype_0, x = var_347_shape_cast_fp16)[name = string("cast_173")]; + uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_347_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")]; + string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor([0])]; + int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_172")]; + tensor expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([6, 0, 0, 0])]; + tensor concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor([0])]; + tensor concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor([0])]; + tensor concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor([0])]; + int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)]; + bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)]; + tensor concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")]; + tensor v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")]; + tensor var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82597376)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_369_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")]; + tensor var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85874240)))]; + tensor var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89151104)))]; + tensor linear_15_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")]; + tensor var_376_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_376_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_376_shape_cast_fp16_to_uint16 = cast(dtype = var_376_shape_cast_fp16_to_uint16_dtype_0, x = var_376_shape_cast_fp16)[name = string("cast_171")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_376_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_170")]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([7, 0, 0, 0])]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([0])]; + tensor concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor([0])]; + tensor concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor([0])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")]; + tensor k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")]; + tensor var_381_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_381_shape_cast_fp16")]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_381_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_381_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)]; + tensor var_381_shape_cast_fp16_to_uint16 = cast(dtype = var_381_shape_cast_fp16_to_uint16_dtype_0, x = var_381_shape_cast_fp16)[name = string("cast_169")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_381_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor([0])]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_168")]; + tensor expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([7, 0, 0, 0])]; + tensor concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor([0])]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")]; + tensor v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")]; + tensor var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89153728)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_403_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")]; + tensor var_407_to_fp16 = const()[name = string("op_407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92430592)))]; + tensor var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95707456)))]; + tensor linear_17_cast_fp16 = linear(bias = var_408_to_fp16, weight = var_407_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")]; + tensor var_410_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_410_shape_cast_fp16")]; + int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)]; + int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)]; + bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)]; + string var_410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)]; + tensor var_410_shape_cast_fp16_to_uint16 = cast(dtype = var_410_shape_cast_fp16_to_uint16_dtype_0, x = var_410_shape_cast_fp16)[name = string("cast_167")]; + uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_410_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")]; + string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor([0])]; + int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_166")]; + tensor expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([8, 0, 0, 0])]; + tensor concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor([0])]; + tensor concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor([0])]; + tensor concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor([0])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")]; + tensor k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")]; + tensor var_415_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_415_shape_cast_fp16")]; + int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)]; + int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)]; + bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)]; + string var_415_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_415_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)]; + tensor var_415_shape_cast_fp16_to_uint16 = cast(dtype = var_415_shape_cast_fp16_to_uint16_dtype_0, x = var_415_shape_cast_fp16)[name = string("cast_165")]; + uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_415_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")]; + string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor([0])]; + int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_164")]; + tensor expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([8, 0, 0, 0])]; + tensor concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor([0])]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")]; + tensor v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")]; + tensor var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95710080)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_437_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")]; + tensor var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98986944)))]; + tensor var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102263808)))]; + tensor linear_19_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")]; + tensor var_444_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_444_shape_cast_fp16")]; + int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)]; + int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)]; + bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)]; + string var_444_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_444_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)]; + tensor var_444_shape_cast_fp16_to_uint16 = cast(dtype = var_444_shape_cast_fp16_to_uint16_dtype_0, x = var_444_shape_cast_fp16)[name = string("cast_163")]; + uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_444_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")]; + string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_162")]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([9, 0, 0, 0])]; + tensor concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor([0])]; + tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; + tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")]; + tensor k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")]; + tensor var_449_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_449_shape_cast_fp16")]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_449_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_449_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)]; + tensor var_449_shape_cast_fp16_to_uint16 = cast(dtype = var_449_shape_cast_fp16_to_uint16_dtype_0, x = var_449_shape_cast_fp16)[name = string("cast_161")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_449_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor([0])]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_160")]; + tensor expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([9, 0, 0, 0])]; + tensor concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor([0])]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")]; + tensor v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")]; + tensor var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102266432)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_471_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105543296)))]; + tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108820160)))]; + tensor linear_21_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")]; + tensor var_478_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_478_shape_cast_fp16")]; + int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)]; + int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)]; + bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)]; + string var_478_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_478_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)]; + tensor var_478_shape_cast_fp16_to_uint16 = cast(dtype = var_478_shape_cast_fp16_to_uint16_dtype_0, x = var_478_shape_cast_fp16)[name = string("cast_159")]; + uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_478_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")]; + string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor([0])]; + int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_158")]; + tensor expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([10, 0, 0, 0])]; + tensor concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor([0])]; + tensor concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor([0])]; + tensor concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor([0])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")]; + tensor k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")]; + tensor var_483_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_483_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_483_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_483_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)]; + tensor var_483_shape_cast_fp16_to_uint16 = cast(dtype = var_483_shape_cast_fp16_to_uint16_dtype_0, x = var_483_shape_cast_fp16)[name = string("cast_157")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_483_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor([0])]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_156")]; + tensor expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")]; + tensor concat_68 = const()[name = string("concat_68"), val = tensor([10, 0, 0, 0])]; + tensor concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor([0])]; + tensor concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor([0])]; + tensor concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor([0])]; + int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)]; + bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)]; + tensor concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")]; + tensor v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")]; + tensor var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108822784)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_505_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")]; + tensor var_509_to_fp16 = const()[name = string("op_509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112099648)))]; + tensor var_510_to_fp16 = const()[name = string("op_510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115376512)))]; + tensor linear_23_cast_fp16 = linear(bias = var_510_to_fp16, weight = var_509_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")]; + tensor var_512_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_512_shape_cast_fp16")]; + int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)]; + int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)]; + bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)]; + string var_512_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_512_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)]; + tensor var_512_shape_cast_fp16_to_uint16 = cast(dtype = var_512_shape_cast_fp16_to_uint16_dtype_0, x = var_512_shape_cast_fp16)[name = string("cast_155")]; + uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_512_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")]; + string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_154")]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([11, 0, 0, 0])]; + tensor concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor([0])]; + tensor concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor([0])]; + tensor concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor([0])]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")]; + tensor k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")]; + tensor var_517_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_517_shape_cast_fp16")]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_517_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_517_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_517_shape_cast_fp16_to_uint16 = cast(dtype = var_517_shape_cast_fp16_to_uint16_dtype_0, x = var_517_shape_cast_fp16)[name = string("cast_153")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_517_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor([0])]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_152")]; + tensor expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")]; + tensor concat_74 = const()[name = string("concat_74"), val = tensor([11, 0, 0, 0])]; + tensor concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor([0])]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")]; + tensor v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")]; + tensor var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115379136)))]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_539_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")]; + tensor var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118656000)))]; + tensor var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121932864)))]; + tensor linear_25_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")]; + tensor var_546_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_546_shape_cast_fp16")]; + int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)]; + int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)]; + bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)]; + string var_546_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_546_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)]; + tensor var_546_shape_cast_fp16_to_uint16 = cast(dtype = var_546_shape_cast_fp16_to_uint16_dtype_0, x = var_546_shape_cast_fp16)[name = string("cast_151")]; + uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_546_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")]; + string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor([0])]; + int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_150")]; + tensor expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")]; + tensor concat_77 = const()[name = string("concat_77"), val = tensor([12, 0, 0, 0])]; + tensor concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor([0])]; + tensor concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor([0])]; + tensor concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor([0])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")]; + tensor k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")]; + tensor var_551_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_551_shape_cast_fp16")]; + int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)]; + int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)]; + bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)]; + string var_551_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_551_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)]; + tensor var_551_shape_cast_fp16_to_uint16 = cast(dtype = var_551_shape_cast_fp16_to_uint16_dtype_0, x = var_551_shape_cast_fp16)[name = string("cast_149")]; + uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_551_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")]; + string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor([0])]; + int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_148")]; + tensor expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")]; + tensor concat_80 = const()[name = string("concat_80"), val = tensor([12, 0, 0, 0])]; + tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([0])]; + tensor concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor([0])]; + tensor concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor([0])]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")]; + tensor v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")]; + tensor var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121935488)))]; + tensor linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_573_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")]; + tensor var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125212352)))]; + tensor var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128489216)))]; + tensor linear_27_cast_fp16 = linear(bias = var_578_to_fp16, weight = var_577_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")]; + tensor var_580_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_580_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_580_shape_cast_fp16_to_uint16 = cast(dtype = var_580_shape_cast_fp16_to_uint16_dtype_0, x = var_580_shape_cast_fp16)[name = string("cast_147")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_580_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_146")]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")]; + tensor concat_83 = const()[name = string("concat_83"), val = tensor([13, 0, 0, 0])]; + tensor concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor([0])]; + tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; + tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; + int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; + bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; + tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")]; + tensor k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")]; + tensor var_585_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_585_shape_cast_fp16")]; + int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)]; + int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)]; + bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)]; + string var_585_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_585_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)]; + tensor var_585_shape_cast_fp16_to_uint16 = cast(dtype = var_585_shape_cast_fp16_to_uint16_dtype_0, x = var_585_shape_cast_fp16)[name = string("cast_145")]; + uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_585_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")]; + string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor([0])]; + int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_144")]; + tensor expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([13, 0, 0, 0])]; + tensor concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor([0])]; + tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; + tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; + int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; + bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; + tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")]; + tensor v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")]; + tensor var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128491840)))]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_607_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = string("op_611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131768704)))]; + tensor var_612_to_fp16 = const()[name = string("op_612_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135045568)))]; + tensor linear_29_cast_fp16 = linear(bias = var_612_to_fp16, weight = var_611_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")]; + tensor var_614_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_614_shape_cast_fp16")]; + int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)]; + int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)]; + bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)]; + string var_614_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_614_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)]; + tensor var_614_shape_cast_fp16_to_uint16 = cast(dtype = var_614_shape_cast_fp16_to_uint16_dtype_0, x = var_614_shape_cast_fp16)[name = string("cast_143")]; + uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_614_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")]; + string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor([0])]; + int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_142")]; + tensor expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([14, 0, 0, 0])]; + tensor concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor([0])]; + tensor concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor([0])]; + tensor concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor([0])]; + int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; + bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; + tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")]; + tensor k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")]; + tensor var_619_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_619_shape_cast_fp16")]; + int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)]; + int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)]; + bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)]; + string var_619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)]; + tensor var_619_shape_cast_fp16_to_uint16 = cast(dtype = var_619_shape_cast_fp16_to_uint16_dtype_0, x = var_619_shape_cast_fp16)[name = string("cast_141")]; + uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_619_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")]; + string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor([0])]; + int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_140")]; + tensor expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")]; + tensor concat_92 = const()[name = string("concat_92"), val = tensor([14, 0, 0, 0])]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")]; + tensor v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")]; + tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135048192)))]; + tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_641_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")]; + tensor var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138325056)))]; + tensor var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141601920)))]; + tensor linear_31_cast_fp16 = linear(bias = var_646_to_fp16, weight = var_645_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")]; + tensor var_648_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_648_shape_cast_fp16")]; + int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)]; + int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)]; + bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)]; + string var_648_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_648_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)]; + tensor var_648_shape_cast_fp16_to_uint16 = cast(dtype = var_648_shape_cast_fp16_to_uint16_dtype_0, x = var_648_shape_cast_fp16)[name = string("cast_139")]; + uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_648_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")]; + string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_138")]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")]; + tensor concat_95 = const()[name = string("concat_95"), val = tensor([15, 0, 0, 0])]; + tensor concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor([0])]; + tensor concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor([0])]; + tensor concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor([0])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")]; + tensor k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = k_cache2)[name = string("coreml_update_state_98")]; + tensor var_653_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_653_shape_cast_fp16")]; + int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)]; + int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)]; + bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)]; + string var_653_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_653_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)]; + tensor var_653_shape_cast_fp16_to_uint16 = cast(dtype = var_653_shape_cast_fp16_to_uint16_dtype_0, x = var_653_shape_cast_fp16)[name = string("cast_137")]; + uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_653_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")]; + string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor([0])]; + int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_136")]; + tensor expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")]; + tensor concat_98 = const()[name = string("concat_98"), val = tensor([15, 0, 0, 0])]; + tensor concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor([0])]; + tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; + tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; + int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; + bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; + tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")]; + tensor v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = v_cache2)[name = string("coreml_update_state_99")]; + tensor var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141604544)))]; + tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_675_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144881408)))]; + tensor var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148158272)))]; + tensor linear_33_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")]; + tensor var_682_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_682_shape_cast_fp16")]; + int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)]; + int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)]; + bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)]; + string var_682_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_682_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)]; + tensor var_682_shape_cast_fp16_to_uint16 = cast(dtype = var_682_shape_cast_fp16_to_uint16_dtype_0, x = var_682_shape_cast_fp16)[name = string("cast_135")]; + uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_682_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")]; + string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor([0])]; + int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_134")]; + tensor expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")]; + tensor concat_101 = const()[name = string("concat_101"), val = tensor([16, 0, 0, 0])]; + tensor concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor([0])]; + tensor concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor([0])]; + tensor concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor([0])]; + int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; + bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; + tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")]; + tensor k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_98)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = k_cache2)[name = string("coreml_update_state_100")]; + tensor var_687_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_687_shape_cast_fp16")]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_687_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_687_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)]; + tensor var_687_shape_cast_fp16_to_uint16 = cast(dtype = var_687_shape_cast_fp16_to_uint16_dtype_0, x = var_687_shape_cast_fp16)[name = string("cast_133")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_687_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor([0])]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_132")]; + tensor expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")]; + tensor concat_104 = const()[name = string("concat_104"), val = tensor([16, 0, 0, 0])]; + tensor concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor([0])]; + tensor concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor([0])]; + tensor concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor([0])]; + int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)]; + bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)]; + tensor concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")]; + tensor v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_99)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = v_cache2)[name = string("coreml_update_state_101")]; + tensor var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148160896)))]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_709_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")]; + tensor var_713_to_fp16 = const()[name = string("op_713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151437760)))]; + tensor var_714_to_fp16 = const()[name = string("op_714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154714624)))]; + tensor linear_35_cast_fp16 = linear(bias = var_714_to_fp16, weight = var_713_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")]; + tensor var_716_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_716_shape_cast_fp16")]; + int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)]; + int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)]; + bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)]; + string var_716_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_716_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)]; + tensor var_716_shape_cast_fp16_to_uint16 = cast(dtype = var_716_shape_cast_fp16_to_uint16_dtype_0, x = var_716_shape_cast_fp16)[name = string("cast_131")]; + uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_716_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")]; + string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_130")]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([17, 0, 0, 0])]; + tensor concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor([0])]; + tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; + tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; + int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; + bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")]; + tensor k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_100)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = k_cache2)[name = string("coreml_update_state_102")]; + tensor var_721_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_721_shape_cast_fp16")]; + int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)]; + int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)]; + bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)]; + string var_721_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_721_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)]; + tensor var_721_shape_cast_fp16_to_uint16 = cast(dtype = var_721_shape_cast_fp16_to_uint16_dtype_0, x = var_721_shape_cast_fp16)[name = string("cast_129")]; + uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_721_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")]; + string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor([0])]; + int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_128")]; + tensor expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")]; + tensor concat_110 = const()[name = string("concat_110"), val = tensor([17, 0, 0, 0])]; + tensor concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor([0])]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")]; + tensor v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_101)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = v_cache2)[name = string("coreml_update_state_103")]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154717248)))]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_743_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")]; + tensor var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157994112)))]; + tensor var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161270976)))]; + tensor linear_37_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")]; + tensor var_750_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_750_shape_cast_fp16")]; + int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)]; + int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)]; + bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)]; + string var_750_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_750_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)]; + tensor var_750_shape_cast_fp16_to_uint16 = cast(dtype = var_750_shape_cast_fp16_to_uint16_dtype_0, x = var_750_shape_cast_fp16)[name = string("cast_127")]; + uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_750_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")]; + string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor([0])]; + int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_126")]; + tensor expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")]; + tensor concat_113 = const()[name = string("concat_113"), val = tensor([18, 0, 0, 0])]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([0])]; + tensor concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor([0])]; + tensor concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor([0])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")]; + tensor k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_102)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = k_cache2)[name = string("coreml_update_state_104")]; + tensor var_755_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_755_shape_cast_fp16")]; + int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; + int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; + bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; + string var_755_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_755_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)]; + tensor var_755_shape_cast_fp16_to_uint16 = cast(dtype = var_755_shape_cast_fp16_to_uint16_dtype_0, x = var_755_shape_cast_fp16)[name = string("cast_125")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_755_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; + string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor([0])]; + int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_124")]; + tensor expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")]; + tensor concat_116 = const()[name = string("concat_116"), val = tensor([18, 0, 0, 0])]; + tensor concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor([0])]; + tensor concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor([0])]; + tensor concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor([0])]; + int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)]; + bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)]; + tensor concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")]; + tensor v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_103)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = v_cache2)[name = string("coreml_update_state_105")]; + tensor var_777_to_fp16 = const()[name = string("op_777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161273600)))]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_777_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")]; + tensor var_781_to_fp16 = const()[name = string("op_781_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164550464)))]; + tensor var_782_to_fp16 = const()[name = string("op_782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167827328)))]; + tensor linear_39_cast_fp16 = linear(bias = var_782_to_fp16, weight = var_781_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")]; + tensor var_784_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_784_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_784_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_784_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_784_shape_cast_fp16_to_uint16 = cast(dtype = var_784_shape_cast_fp16_to_uint16_dtype_0, x = var_784_shape_cast_fp16)[name = string("cast_123")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_784_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_122")]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")]; + tensor concat_119 = const()[name = string("concat_119"), val = tensor([19, 0, 0, 0])]; + tensor concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor([0])]; + tensor concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor([0])]; + tensor concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor([0])]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")]; + tensor k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_104)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = k_cache2)[name = string("coreml_update_state_106")]; + tensor var_789_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_789_shape_cast_fp16")]; + int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; + int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; + bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; + string var_789_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_789_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)]; + tensor var_789_shape_cast_fp16_to_uint16 = cast(dtype = var_789_shape_cast_fp16_to_uint16_dtype_0, x = var_789_shape_cast_fp16)[name = string("cast_121")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_789_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; + string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor([0])]; + int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_120")]; + tensor expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")]; + tensor concat_122 = const()[name = string("concat_122"), val = tensor([19, 0, 0, 0])]; + tensor concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor([0])]; + tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; + tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; + int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; + bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; + tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")]; + tensor v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_105)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = v_cache2)[name = string("coreml_update_state_107")]; + tensor var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167829952)))]; + tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_811_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")]; + tensor var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171106816)))]; + tensor var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174383680)))]; + tensor linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")]; + tensor var_818_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_818_shape_cast_fp16")]; + int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)]; + int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)]; + bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)]; + string var_818_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_818_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)]; + tensor var_818_shape_cast_fp16_to_uint16 = cast(dtype = var_818_shape_cast_fp16_to_uint16_dtype_0, x = var_818_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_818_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")]; + string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor([0])]; + int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_118")]; + tensor expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([20, 0, 0, 0])]; + tensor concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor([0])]; + tensor concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor([0])]; + tensor concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor([0])]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")]; + tensor k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_106)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = k_cache2)[name = string("coreml_update_state_108")]; + tensor var_823_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_823_shape_cast_fp16")]; + int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; + int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; + bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; + string var_823_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_823_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; + tensor var_823_shape_cast_fp16_to_uint16 = cast(dtype = var_823_shape_cast_fp16_to_uint16_dtype_0, x = var_823_shape_cast_fp16)[name = string("cast_117")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_823_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; + string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor([0])]; + int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_116")]; + tensor expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([20, 0, 0, 0])]; + tensor concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor([0])]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")]; + tensor v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_107)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = v_cache2)[name = string("coreml_update_state_109")]; + tensor var_845_to_fp16 = const()[name = string("op_845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174386304)))]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_845_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")]; + tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177663168)))]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180940032)))]; + tensor linear_43_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")]; + tensor var_852_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_852_shape_cast_fp16")]; + int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)]; + int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)]; + bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)]; + string var_852_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_852_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)]; + tensor var_852_shape_cast_fp16_to_uint16 = cast(dtype = var_852_shape_cast_fp16_to_uint16_dtype_0, x = var_852_shape_cast_fp16)[name = string("cast_115")]; + uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_852_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")]; + string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_114")]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([21, 0, 0, 0])]; + tensor concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor([0])]; + tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; + tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")]; + tensor k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_108)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = k_cache2)[name = string("coreml_update_state_110")]; + tensor var_857_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_857_shape_cast_fp16")]; + int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)]; + int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)]; + bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)]; + string var_857_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_857_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)]; + tensor var_857_shape_cast_fp16_to_uint16 = cast(dtype = var_857_shape_cast_fp16_to_uint16_dtype_0, x = var_857_shape_cast_fp16)[name = string("cast_113")]; + uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_857_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")]; + string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor([0])]; + int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_112")]; + tensor expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")]; + tensor concat_134 = const()[name = string("concat_134"), val = tensor([21, 0, 0, 0])]; + tensor concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor([0])]; + tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; + tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; + int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; + bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; + tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")]; + tensor v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_109)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = v_cache2)[name = string("coreml_update_state_111")]; + tensor var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180942656)))]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_879_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")]; + tensor var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184219520)))]; + tensor var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187496384)))]; + tensor linear_45_cast_fp16 = linear(bias = var_884_to_fp16, weight = var_883_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")]; + tensor var_886_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_886_shape_cast_fp16")]; + int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)]; + int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)]; + bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)]; + string var_886_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_886_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)]; + tensor var_886_shape_cast_fp16_to_uint16 = cast(dtype = var_886_shape_cast_fp16_to_uint16_dtype_0, x = var_886_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_886_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")]; + string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor([0])]; + int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_110")]; + tensor expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")]; + tensor concat_137 = const()[name = string("concat_137"), val = tensor([22, 0, 0, 0])]; + tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([0])]; + tensor concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor([0])]; + tensor concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor([0])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")]; + tensor k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_110)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = k_cache2)[name = string("coreml_update_state_112")]; + tensor var_891_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_891_shape_cast_fp16")]; + int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)]; + int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)]; + bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)]; + string var_891_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_891_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)]; + tensor var_891_shape_cast_fp16_to_uint16 = cast(dtype = var_891_shape_cast_fp16_to_uint16_dtype_0, x = var_891_shape_cast_fp16)[name = string("cast_109")]; + uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_891_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")]; + string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor([0])]; + int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_108")]; + tensor expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")]; + tensor concat_140 = const()[name = string("concat_140"), val = tensor([22, 0, 0, 0])]; + tensor concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor([0])]; + tensor concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor([0])]; + tensor concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor([0])]; + int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)]; + bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)]; + tensor concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")]; + tensor v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_111)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = v_cache2)[name = string("coreml_update_state_113")]; + tensor var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187499008)))]; + tensor linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_913_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")]; + tensor var_917_to_fp16 = const()[name = string("op_917_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190775872)))]; + tensor var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194052736)))]; + tensor linear_47_cast_fp16 = linear(bias = var_918_to_fp16, weight = var_917_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")]; + tensor var_920_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_920_shape_cast_fp16")]; + int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)]; + int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)]; + bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)]; + string var_920_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_920_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)]; + tensor var_920_shape_cast_fp16_to_uint16 = cast(dtype = var_920_shape_cast_fp16_to_uint16_dtype_0, x = var_920_shape_cast_fp16)[name = string("cast_107")]; + uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_920_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")]; + string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_106")]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([23, 0, 0, 0])]; + tensor concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor([0])]; + tensor concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor([0])]; + tensor concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor([0])]; + int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)]; + bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)]; + tensor concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")]; + tensor k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_112)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = k_cache2)[name = string("coreml_update_state_114")]; + tensor var_925_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_925_shape_cast_fp16")]; + int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)]; + int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)]; + bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)]; + string var_925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)]; + tensor var_925_shape_cast_fp16_to_uint16 = cast(dtype = var_925_shape_cast_fp16_to_uint16_dtype_0, x = var_925_shape_cast_fp16)[name = string("cast_105")]; + uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_925_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")]; + string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor([0])]; + int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_104")]; + tensor expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")]; + tensor concat_146 = const()[name = string("concat_146"), val = tensor([23, 0, 0, 0])]; + tensor concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor([0])]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")]; + tensor v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_113)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = v_cache2)[name = string("coreml_update_state_115")]; + tensor var_947_to_fp16 = const()[name = string("op_947_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194055360)))]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_947_to_fp16, x = audio_data)[name = string("linear_48_cast_fp16")]; + tensor var_951_to_fp16 = const()[name = string("op_951_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197332224)))]; + tensor var_952_to_fp16 = const()[name = string("op_952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200609088)))]; + tensor linear_49_cast_fp16 = linear(bias = var_952_to_fp16, weight = var_951_to_fp16, x = audio_data)[name = string("linear_49_cast_fp16")]; + tensor var_954_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_954_shape_cast_fp16")]; + int32 gather_48_axis_0 = const()[name = string("gather_48_axis_0"), val = int32(0)]; + int32 gather_48_batch_dims_0 = const()[name = string("gather_48_batch_dims_0"), val = int32(0)]; + bool gather_48_validate_indices_0 = const()[name = string("gather_48_validate_indices_0"), val = bool(false)]; + string var_954_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_954_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_48_to_uint16 = const()[name = string("select_48_to_uint16"), val = uint16(1)]; + tensor var_954_shape_cast_fp16_to_uint16 = cast(dtype = var_954_shape_cast_fp16_to_uint16_dtype_0, x = var_954_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_48_cast_uint16 = gather(axis = gather_48_axis_0, batch_dims = gather_48_batch_dims_0, indices = select_48_to_uint16, validate_indices = gather_48_validate_indices_0, x = var_954_shape_cast_fp16_to_uint16)[name = string("gather_48_cast_uint16")]; + string gather_48_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_48_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_203_axes_0 = const()[name = string("expand_dims_203_axes_0"), val = tensor([0])]; + int32 gather_48_cast_uint16_to_int32 = cast(dtype = gather_48_cast_uint16_to_int32_dtype_0, x = gather_48_cast_uint16)[name = string("cast_102")]; + tensor expand_dims_203 = expand_dims(axes = expand_dims_203_axes_0, x = gather_48_cast_uint16_to_int32)[name = string("expand_dims_203")]; + tensor concat_149 = const()[name = string("concat_149"), val = tensor([24, 0, 0, 0])]; + tensor concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = tensor([0])]; + tensor concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = tensor([0])]; + tensor concat_150_values3_0 = const()[name = string("concat_150_values3_0"), val = tensor([0])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, expand_dims_203, concat_150_values3_0))[name = string("concat_150")]; + tensor k_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_149, begin_mask = k_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_150, end_mask = k_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_25_stride_0, update = linear_48_cast_fp16, x = coreml_update_state_114)[name = string("k_cache2_internal_tensor_assign_25_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_25_cast_fp16, input = k_cache2)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = k_cache2)[name = string("coreml_update_state_116")]; + tensor var_959_shape_cast_fp16 = shape(x = linear_49_cast_fp16)[name = string("op_959_shape_cast_fp16")]; + int32 gather_49_axis_0 = const()[name = string("gather_49_axis_0"), val = int32(0)]; + int32 gather_49_batch_dims_0 = const()[name = string("gather_49_batch_dims_0"), val = int32(0)]; + bool gather_49_validate_indices_0 = const()[name = string("gather_49_validate_indices_0"), val = bool(false)]; + string var_959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_49_to_uint16 = const()[name = string("select_49_to_uint16"), val = uint16(1)]; + tensor var_959_shape_cast_fp16_to_uint16 = cast(dtype = var_959_shape_cast_fp16_to_uint16_dtype_0, x = var_959_shape_cast_fp16)[name = string("cast_101")]; + uint16 gather_49_cast_uint16 = gather(axis = gather_49_axis_0, batch_dims = gather_49_batch_dims_0, indices = select_49_to_uint16, validate_indices = gather_49_validate_indices_0, x = var_959_shape_cast_fp16_to_uint16)[name = string("gather_49_cast_uint16")]; + string gather_49_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_49_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_207_axes_0 = const()[name = string("expand_dims_207_axes_0"), val = tensor([0])]; + int32 gather_49_cast_uint16_to_int32 = cast(dtype = gather_49_cast_uint16_to_int32_dtype_0, x = gather_49_cast_uint16)[name = string("cast_100")]; + tensor expand_dims_207 = expand_dims(axes = expand_dims_207_axes_0, x = gather_49_cast_uint16_to_int32)[name = string("expand_dims_207")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([24, 0, 0, 0])]; + tensor concat_153_values0_0 = const()[name = string("concat_153_values0_0"), val = tensor([0])]; + tensor concat_153_values1_0 = const()[name = string("concat_153_values1_0"), val = tensor([0])]; + tensor concat_153_values3_0 = const()[name = string("concat_153_values3_0"), val = tensor([0])]; + int32 concat_153_axis_0 = const()[name = string("concat_153_axis_0"), val = int32(0)]; + bool concat_153_interleave_0 = const()[name = string("concat_153_interleave_0"), val = bool(false)]; + tensor concat_153 = concat(axis = concat_153_axis_0, interleave = concat_153_interleave_0, values = (concat_153_values0_0, concat_153_values1_0, expand_dims_207, concat_153_values3_0))[name = string("concat_153")]; + tensor v_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_153, end_mask = v_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_25_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_115)[name = string("v_cache2_internal_tensor_assign_25_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_25_cast_fp16, input = v_cache2)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = v_cache2)[name = string("coreml_update_state_117")]; + tensor var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200611712)))]; + tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_981_to_fp16, x = audio_data)[name = string("linear_50_cast_fp16")]; + tensor var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203888576)))]; + tensor var_986_to_fp16 = const()[name = string("op_986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207165440)))]; + tensor linear_51_cast_fp16 = linear(bias = var_986_to_fp16, weight = var_985_to_fp16, x = audio_data)[name = string("linear_51_cast_fp16")]; + tensor var_988_shape_cast_fp16 = shape(x = linear_50_cast_fp16)[name = string("op_988_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_988_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_988_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_988_shape_cast_fp16_to_uint16 = cast(dtype = var_988_shape_cast_fp16_to_uint16_dtype_0, x = var_988_shape_cast_fp16)[name = string("cast_99")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_988_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_98")]; + tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = gather_50_cast_uint16_to_int32)[name = string("expand_dims_211")]; + tensor concat_155 = const()[name = string("concat_155"), val = tensor([25, 0, 0, 0])]; + tensor concat_156_values0_0 = const()[name = string("concat_156_values0_0"), val = tensor([0])]; + tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; + tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; + int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; + bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; + tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (concat_156_values0_0, concat_156_values1_0, expand_dims_211, concat_156_values3_0))[name = string("concat_156")]; + tensor k_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_155, begin_mask = k_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_156, end_mask = k_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_26_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_116)[name = string("k_cache2_internal_tensor_assign_26_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_26_cast_fp16, input = k_cache2)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = k_cache2)[name = string("coreml_update_state_118")]; + tensor var_993_shape_cast_fp16 = shape(x = linear_51_cast_fp16)[name = string("op_993_shape_cast_fp16")]; + int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; + int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; + bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; + string var_993_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_993_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(1)]; + tensor var_993_shape_cast_fp16_to_uint16 = cast(dtype = var_993_shape_cast_fp16_to_uint16_dtype_0, x = var_993_shape_cast_fp16)[name = string("cast_97")]; + uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_993_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; + string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_215_axes_0 = const()[name = string("expand_dims_215_axes_0"), val = tensor([0])]; + int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_96")]; + tensor expand_dims_215 = expand_dims(axes = expand_dims_215_axes_0, x = gather_51_cast_uint16_to_int32)[name = string("expand_dims_215")]; + tensor concat_158 = const()[name = string("concat_158"), val = tensor([25, 0, 0, 0])]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_215, concat_159_values3_0))[name = string("concat_159")]; + tensor v_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_159, end_mask = v_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_26_stride_0, update = linear_51_cast_fp16, x = coreml_update_state_117)[name = string("v_cache2_internal_tensor_assign_26_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_26_cast_fp16, input = v_cache2)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = v_cache2)[name = string("coreml_update_state_119")]; + tensor var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207168064)))]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1015_to_fp16, x = audio_data)[name = string("linear_52_cast_fp16")]; + tensor var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210444928)))]; + tensor var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213721792)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = audio_data)[name = string("linear_53_cast_fp16")]; + tensor var_1022_shape_cast_fp16 = shape(x = linear_52_cast_fp16)[name = string("op_1022_shape_cast_fp16")]; + int32 gather_52_axis_0 = const()[name = string("gather_52_axis_0"), val = int32(0)]; + int32 gather_52_batch_dims_0 = const()[name = string("gather_52_batch_dims_0"), val = int32(0)]; + bool gather_52_validate_indices_0 = const()[name = string("gather_52_validate_indices_0"), val = bool(false)]; + string var_1022_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1022_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_52_to_uint16 = const()[name = string("select_52_to_uint16"), val = uint16(1)]; + tensor var_1022_shape_cast_fp16_to_uint16 = cast(dtype = var_1022_shape_cast_fp16_to_uint16_dtype_0, x = var_1022_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_52_cast_uint16 = gather(axis = gather_52_axis_0, batch_dims = gather_52_batch_dims_0, indices = select_52_to_uint16, validate_indices = gather_52_validate_indices_0, x = var_1022_shape_cast_fp16_to_uint16)[name = string("gather_52_cast_uint16")]; + string gather_52_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_52_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_219_axes_0 = const()[name = string("expand_dims_219_axes_0"), val = tensor([0])]; + int32 gather_52_cast_uint16_to_int32 = cast(dtype = gather_52_cast_uint16_to_int32_dtype_0, x = gather_52_cast_uint16)[name = string("cast_94")]; + tensor expand_dims_219 = expand_dims(axes = expand_dims_219_axes_0, x = gather_52_cast_uint16_to_int32)[name = string("expand_dims_219")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([26, 0, 0, 0])]; + tensor concat_162_values0_0 = const()[name = string("concat_162_values0_0"), val = tensor([0])]; + tensor concat_162_values1_0 = const()[name = string("concat_162_values1_0"), val = tensor([0])]; + tensor concat_162_values3_0 = const()[name = string("concat_162_values3_0"), val = tensor([0])]; + int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; + bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; + tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (concat_162_values0_0, concat_162_values1_0, expand_dims_219, concat_162_values3_0))[name = string("concat_162")]; + tensor k_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_161, begin_mask = k_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_162, end_mask = k_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_27_stride_0, update = linear_52_cast_fp16, x = coreml_update_state_118)[name = string("k_cache2_internal_tensor_assign_27_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_27_cast_fp16, input = k_cache2)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_120 = read_state(input = k_cache2)[name = string("coreml_update_state_120")]; + tensor var_1027_shape_cast_fp16 = shape(x = linear_53_cast_fp16)[name = string("op_1027_shape_cast_fp16")]; + int32 gather_53_axis_0 = const()[name = string("gather_53_axis_0"), val = int32(0)]; + int32 gather_53_batch_dims_0 = const()[name = string("gather_53_batch_dims_0"), val = int32(0)]; + bool gather_53_validate_indices_0 = const()[name = string("gather_53_validate_indices_0"), val = bool(false)]; + string var_1027_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1027_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_53_to_uint16 = const()[name = string("select_53_to_uint16"), val = uint16(1)]; + tensor var_1027_shape_cast_fp16_to_uint16 = cast(dtype = var_1027_shape_cast_fp16_to_uint16_dtype_0, x = var_1027_shape_cast_fp16)[name = string("cast_93")]; + uint16 gather_53_cast_uint16 = gather(axis = gather_53_axis_0, batch_dims = gather_53_batch_dims_0, indices = select_53_to_uint16, validate_indices = gather_53_validate_indices_0, x = var_1027_shape_cast_fp16_to_uint16)[name = string("gather_53_cast_uint16")]; + string gather_53_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_53_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_223_axes_0 = const()[name = string("expand_dims_223_axes_0"), val = tensor([0])]; + int32 gather_53_cast_uint16_to_int32 = cast(dtype = gather_53_cast_uint16_to_int32_dtype_0, x = gather_53_cast_uint16)[name = string("cast_92")]; + tensor expand_dims_223 = expand_dims(axes = expand_dims_223_axes_0, x = gather_53_cast_uint16_to_int32)[name = string("expand_dims_223")]; + tensor concat_164 = const()[name = string("concat_164"), val = tensor([26, 0, 0, 0])]; + tensor concat_165_values0_0 = const()[name = string("concat_165_values0_0"), val = tensor([0])]; + tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; + tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (concat_165_values0_0, concat_165_values1_0, expand_dims_223, concat_165_values3_0))[name = string("concat_165")]; + tensor v_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_164, begin_mask = v_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_165, end_mask = v_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_27_stride_0, update = linear_53_cast_fp16, x = coreml_update_state_119)[name = string("v_cache2_internal_tensor_assign_27_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_27_cast_fp16, input = v_cache2)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_121 = read_state(input = v_cache2)[name = string("coreml_update_state_121")]; + tensor var_1049_to_fp16 = const()[name = string("op_1049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213724416)))]; + tensor linear_54_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1049_to_fp16, x = audio_data)[name = string("linear_54_cast_fp16")]; + tensor var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217001280)))]; + tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220278144)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = audio_data)[name = string("linear_55_cast_fp16")]; + tensor var_1056_shape_cast_fp16 = shape(x = linear_54_cast_fp16)[name = string("op_1056_shape_cast_fp16")]; + int32 gather_54_axis_0 = const()[name = string("gather_54_axis_0"), val = int32(0)]; + int32 gather_54_batch_dims_0 = const()[name = string("gather_54_batch_dims_0"), val = int32(0)]; + bool gather_54_validate_indices_0 = const()[name = string("gather_54_validate_indices_0"), val = bool(false)]; + string var_1056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_54_to_uint16 = const()[name = string("select_54_to_uint16"), val = uint16(1)]; + tensor var_1056_shape_cast_fp16_to_uint16 = cast(dtype = var_1056_shape_cast_fp16_to_uint16_dtype_0, x = var_1056_shape_cast_fp16)[name = string("cast_91")]; + uint16 gather_54_cast_uint16 = gather(axis = gather_54_axis_0, batch_dims = gather_54_batch_dims_0, indices = select_54_to_uint16, validate_indices = gather_54_validate_indices_0, x = var_1056_shape_cast_fp16_to_uint16)[name = string("gather_54_cast_uint16")]; + string gather_54_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_54_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; + int32 gather_54_cast_uint16_to_int32 = cast(dtype = gather_54_cast_uint16_to_int32_dtype_0, x = gather_54_cast_uint16)[name = string("cast_90")]; + tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = gather_54_cast_uint16_to_int32)[name = string("expand_dims_227")]; + tensor concat_167 = const()[name = string("concat_167"), val = tensor([27, 0, 0, 0])]; + tensor concat_168_values0_0 = const()[name = string("concat_168_values0_0"), val = tensor([0])]; + tensor concat_168_values1_0 = const()[name = string("concat_168_values1_0"), val = tensor([0])]; + tensor concat_168_values3_0 = const()[name = string("concat_168_values3_0"), val = tensor([0])]; + int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; + bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; + tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (concat_168_values0_0, concat_168_values1_0, expand_dims_227, concat_168_values3_0))[name = string("concat_168")]; + tensor k_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_167, begin_mask = k_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_168, end_mask = k_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_28_stride_0, update = linear_54_cast_fp16, x = coreml_update_state_120)[name = string("k_cache2_internal_tensor_assign_28_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_28_cast_fp16, input = k_cache2)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_122 = read_state(input = k_cache2)[name = string("coreml_update_state_122")]; + tensor var_1061_shape_cast_fp16 = shape(x = linear_55_cast_fp16)[name = string("op_1061_shape_cast_fp16")]; + int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; + int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; + bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; + string var_1061_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1061_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(1)]; + tensor var_1061_shape_cast_fp16_to_uint16 = cast(dtype = var_1061_shape_cast_fp16_to_uint16_dtype_0, x = var_1061_shape_cast_fp16)[name = string("cast_89")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_1061_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; + string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_231_axes_0 = const()[name = string("expand_dims_231_axes_0"), val = tensor([0])]; + int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_88")]; + tensor expand_dims_231 = expand_dims(axes = expand_dims_231_axes_0, x = gather_55_cast_uint16_to_int32)[name = string("expand_dims_231")]; + tensor concat_170 = const()[name = string("concat_170"), val = tensor([27, 0, 0, 0])]; + tensor concat_171_values0_0 = const()[name = string("concat_171_values0_0"), val = tensor([0])]; + tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; + tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; + int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; + bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; + tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (concat_171_values0_0, concat_171_values1_0, expand_dims_231, concat_171_values3_0))[name = string("concat_171")]; + tensor v_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_170, begin_mask = v_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_171, end_mask = v_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_28_stride_0, update = linear_55_cast_fp16, x = coreml_update_state_121)[name = string("v_cache2_internal_tensor_assign_28_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_28_cast_fp16, input = v_cache2)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_123 = read_state(input = v_cache2)[name = string("coreml_update_state_123")]; + tensor var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220280768)))]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1083_to_fp16, x = audio_data)[name = string("linear_56_cast_fp16")]; + tensor var_1087_to_fp16 = const()[name = string("op_1087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557632)))]; + tensor var_1088_to_fp16 = const()[name = string("op_1088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226834496)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1088_to_fp16, weight = var_1087_to_fp16, x = audio_data)[name = string("linear_57_cast_fp16")]; + tensor var_1090_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1090_shape_cast_fp16")]; + int32 gather_56_axis_0 = const()[name = string("gather_56_axis_0"), val = int32(0)]; + int32 gather_56_batch_dims_0 = const()[name = string("gather_56_batch_dims_0"), val = int32(0)]; + bool gather_56_validate_indices_0 = const()[name = string("gather_56_validate_indices_0"), val = bool(false)]; + string var_1090_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1090_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_56_to_uint16 = const()[name = string("select_56_to_uint16"), val = uint16(1)]; + tensor var_1090_shape_cast_fp16_to_uint16 = cast(dtype = var_1090_shape_cast_fp16_to_uint16_dtype_0, x = var_1090_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_56_cast_uint16 = gather(axis = gather_56_axis_0, batch_dims = gather_56_batch_dims_0, indices = select_56_to_uint16, validate_indices = gather_56_validate_indices_0, x = var_1090_shape_cast_fp16_to_uint16)[name = string("gather_56_cast_uint16")]; + string gather_56_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_56_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_235_axes_0 = const()[name = string("expand_dims_235_axes_0"), val = tensor([0])]; + int32 gather_56_cast_uint16_to_int32 = cast(dtype = gather_56_cast_uint16_to_int32_dtype_0, x = gather_56_cast_uint16)[name = string("cast_86")]; + tensor expand_dims_235 = expand_dims(axes = expand_dims_235_axes_0, x = gather_56_cast_uint16_to_int32)[name = string("expand_dims_235")]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([28, 0, 0, 0])]; + tensor concat_174_values0_0 = const()[name = string("concat_174_values0_0"), val = tensor([0])]; + tensor concat_174_values1_0 = const()[name = string("concat_174_values1_0"), val = tensor([0])]; + tensor concat_174_values3_0 = const()[name = string("concat_174_values3_0"), val = tensor([0])]; + int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; + bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; + tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (concat_174_values0_0, concat_174_values1_0, expand_dims_235, concat_174_values3_0))[name = string("concat_174")]; + tensor k_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_173, begin_mask = k_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_174, end_mask = k_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_29_stride_0, update = linear_56_cast_fp16, x = coreml_update_state_122)[name = string("k_cache2_internal_tensor_assign_29_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_29_cast_fp16, input = k_cache2)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_124 = read_state(input = k_cache2)[name = string("coreml_update_state_124")]; + tensor var_1095_shape_cast_fp16 = shape(x = linear_57_cast_fp16)[name = string("op_1095_shape_cast_fp16")]; + int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; + int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; + bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; + string var_1095_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1095_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(1)]; + tensor var_1095_shape_cast_fp16_to_uint16 = cast(dtype = var_1095_shape_cast_fp16_to_uint16_dtype_0, x = var_1095_shape_cast_fp16)[name = string("cast_85")]; + uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_1095_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; + string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_239_axes_0 = const()[name = string("expand_dims_239_axes_0"), val = tensor([0])]; + int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_84")]; + tensor expand_dims_239 = expand_dims(axes = expand_dims_239_axes_0, x = gather_57_cast_uint16_to_int32)[name = string("expand_dims_239")]; + tensor concat_176 = const()[name = string("concat_176"), val = tensor([28, 0, 0, 0])]; + tensor concat_177_values0_0 = const()[name = string("concat_177_values0_0"), val = tensor([0])]; + tensor concat_177_values1_0 = const()[name = string("concat_177_values1_0"), val = tensor([0])]; + tensor concat_177_values3_0 = const()[name = string("concat_177_values3_0"), val = tensor([0])]; + int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)]; + bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)]; + tensor concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (concat_177_values0_0, concat_177_values1_0, expand_dims_239, concat_177_values3_0))[name = string("concat_177")]; + tensor v_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_176, begin_mask = v_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_177, end_mask = v_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_29_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_123)[name = string("v_cache2_internal_tensor_assign_29_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_29_cast_fp16, input = v_cache2)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_125 = read_state(input = v_cache2)[name = string("coreml_update_state_125")]; + tensor var_1117_to_fp16 = const()[name = string("op_1117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226837120)))]; + tensor linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1117_to_fp16, x = audio_data)[name = string("linear_58_cast_fp16")]; + tensor var_1121_to_fp16 = const()[name = string("op_1121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230113984)))]; + tensor var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233390848)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1122_to_fp16, weight = var_1121_to_fp16, x = audio_data)[name = string("linear_59_cast_fp16")]; + tensor var_1124_shape_cast_fp16 = shape(x = linear_58_cast_fp16)[name = string("op_1124_shape_cast_fp16")]; + int32 gather_58_axis_0 = const()[name = string("gather_58_axis_0"), val = int32(0)]; + int32 gather_58_batch_dims_0 = const()[name = string("gather_58_batch_dims_0"), val = int32(0)]; + bool gather_58_validate_indices_0 = const()[name = string("gather_58_validate_indices_0"), val = bool(false)]; + string var_1124_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1124_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_58_to_uint16 = const()[name = string("select_58_to_uint16"), val = uint16(1)]; + tensor var_1124_shape_cast_fp16_to_uint16 = cast(dtype = var_1124_shape_cast_fp16_to_uint16_dtype_0, x = var_1124_shape_cast_fp16)[name = string("cast_83")]; + uint16 gather_58_cast_uint16 = gather(axis = gather_58_axis_0, batch_dims = gather_58_batch_dims_0, indices = select_58_to_uint16, validate_indices = gather_58_validate_indices_0, x = var_1124_shape_cast_fp16_to_uint16)[name = string("gather_58_cast_uint16")]; + string gather_58_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_58_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; + int32 gather_58_cast_uint16_to_int32 = cast(dtype = gather_58_cast_uint16_to_int32_dtype_0, x = gather_58_cast_uint16)[name = string("cast_82")]; + tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = gather_58_cast_uint16_to_int32)[name = string("expand_dims_243")]; + tensor concat_179 = const()[name = string("concat_179"), val = tensor([29, 0, 0, 0])]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([0])]; + tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; + tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, concat_180_values1_0, expand_dims_243, concat_180_values3_0))[name = string("concat_180")]; + tensor k_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_179, begin_mask = k_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_180, end_mask = k_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_30_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_124)[name = string("k_cache2_internal_tensor_assign_30_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_30_cast_fp16, input = k_cache2)[name = string("coreml_update_state_126_write_state")]; + tensor coreml_update_state_126 = read_state(input = k_cache2)[name = string("coreml_update_state_126")]; + tensor var_1129_shape_cast_fp16 = shape(x = linear_59_cast_fp16)[name = string("op_1129_shape_cast_fp16")]; + int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; + int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; + bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; + string var_1129_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1129_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; + tensor var_1129_shape_cast_fp16_to_uint16 = cast(dtype = var_1129_shape_cast_fp16_to_uint16_dtype_0, x = var_1129_shape_cast_fp16)[name = string("cast_81")]; + uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_1129_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; + string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_247_axes_0 = const()[name = string("expand_dims_247_axes_0"), val = tensor([0])]; + int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_80")]; + tensor expand_dims_247 = expand_dims(axes = expand_dims_247_axes_0, x = gather_59_cast_uint16_to_int32)[name = string("expand_dims_247")]; + tensor concat_182 = const()[name = string("concat_182"), val = tensor([29, 0, 0, 0])]; + tensor concat_183_values0_0 = const()[name = string("concat_183_values0_0"), val = tensor([0])]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (concat_183_values0_0, concat_183_values1_0, expand_dims_247, concat_183_values3_0))[name = string("concat_183")]; + tensor v_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_182, begin_mask = v_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_183, end_mask = v_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_30_stride_0, update = linear_59_cast_fp16, x = coreml_update_state_125)[name = string("v_cache2_internal_tensor_assign_30_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_30_cast_fp16, input = v_cache2)[name = string("coreml_update_state_127_write_state")]; + tensor coreml_update_state_127 = read_state(input = v_cache2)[name = string("coreml_update_state_127")]; + tensor var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233393472)))]; + tensor linear_60_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1151_to_fp16, x = audio_data)[name = string("linear_60_cast_fp16")]; + tensor var_1155_to_fp16 = const()[name = string("op_1155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236670336)))]; + tensor var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239947200)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1156_to_fp16, weight = var_1155_to_fp16, x = audio_data)[name = string("linear_61_cast_fp16")]; + tensor var_1158_shape_cast_fp16 = shape(x = linear_60_cast_fp16)[name = string("op_1158_shape_cast_fp16")]; + int32 gather_60_axis_0 = const()[name = string("gather_60_axis_0"), val = int32(0)]; + int32 gather_60_batch_dims_0 = const()[name = string("gather_60_batch_dims_0"), val = int32(0)]; + bool gather_60_validate_indices_0 = const()[name = string("gather_60_validate_indices_0"), val = bool(false)]; + string var_1158_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1158_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_60_to_uint16 = const()[name = string("select_60_to_uint16"), val = uint16(1)]; + tensor var_1158_shape_cast_fp16_to_uint16 = cast(dtype = var_1158_shape_cast_fp16_to_uint16_dtype_0, x = var_1158_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_60_cast_uint16 = gather(axis = gather_60_axis_0, batch_dims = gather_60_batch_dims_0, indices = select_60_to_uint16, validate_indices = gather_60_validate_indices_0, x = var_1158_shape_cast_fp16_to_uint16)[name = string("gather_60_cast_uint16")]; + string gather_60_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_60_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_251_axes_0 = const()[name = string("expand_dims_251_axes_0"), val = tensor([0])]; + int32 gather_60_cast_uint16_to_int32 = cast(dtype = gather_60_cast_uint16_to_int32_dtype_0, x = gather_60_cast_uint16)[name = string("cast_78")]; + tensor expand_dims_251 = expand_dims(axes = expand_dims_251_axes_0, x = gather_60_cast_uint16_to_int32)[name = string("expand_dims_251")]; + tensor concat_185 = const()[name = string("concat_185"), val = tensor([30, 0, 0, 0])]; + tensor concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = tensor([0])]; + tensor concat_186_values1_0 = const()[name = string("concat_186_values1_0"), val = tensor([0])]; + tensor concat_186_values3_0 = const()[name = string("concat_186_values3_0"), val = tensor([0])]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, concat_186_values1_0, expand_dims_251, concat_186_values3_0))[name = string("concat_186")]; + tensor k_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_185, begin_mask = k_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_186, end_mask = k_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_31_stride_0, update = linear_60_cast_fp16, x = coreml_update_state_126)[name = string("k_cache2_internal_tensor_assign_31_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_31_cast_fp16, input = k_cache2)[name = string("coreml_update_state_128_write_state")]; + tensor coreml_update_state_128 = read_state(input = k_cache2)[name = string("coreml_update_state_128")]; + tensor var_1163_shape_cast_fp16 = shape(x = linear_61_cast_fp16)[name = string("op_1163_shape_cast_fp16")]; + int32 gather_61_axis_0 = const()[name = string("gather_61_axis_0"), val = int32(0)]; + int32 gather_61_batch_dims_0 = const()[name = string("gather_61_batch_dims_0"), val = int32(0)]; + bool gather_61_validate_indices_0 = const()[name = string("gather_61_validate_indices_0"), val = bool(false)]; + string var_1163_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1163_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_61_to_uint16 = const()[name = string("select_61_to_uint16"), val = uint16(1)]; + tensor var_1163_shape_cast_fp16_to_uint16 = cast(dtype = var_1163_shape_cast_fp16_to_uint16_dtype_0, x = var_1163_shape_cast_fp16)[name = string("cast_77")]; + uint16 gather_61_cast_uint16 = gather(axis = gather_61_axis_0, batch_dims = gather_61_batch_dims_0, indices = select_61_to_uint16, validate_indices = gather_61_validate_indices_0, x = var_1163_shape_cast_fp16_to_uint16)[name = string("gather_61_cast_uint16")]; + string gather_61_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_61_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_255_axes_0 = const()[name = string("expand_dims_255_axes_0"), val = tensor([0])]; + int32 gather_61_cast_uint16_to_int32 = cast(dtype = gather_61_cast_uint16_to_int32_dtype_0, x = gather_61_cast_uint16)[name = string("cast_76")]; + tensor expand_dims_255 = expand_dims(axes = expand_dims_255_axes_0, x = gather_61_cast_uint16_to_int32)[name = string("expand_dims_255")]; + tensor concat_188 = const()[name = string("concat_188"), val = tensor([30, 0, 0, 0])]; + tensor concat_189_values0_0 = const()[name = string("concat_189_values0_0"), val = tensor([0])]; + tensor concat_189_values1_0 = const()[name = string("concat_189_values1_0"), val = tensor([0])]; + tensor concat_189_values3_0 = const()[name = string("concat_189_values3_0"), val = tensor([0])]; + int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; + bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; + tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (concat_189_values0_0, concat_189_values1_0, expand_dims_255, concat_189_values3_0))[name = string("concat_189")]; + tensor v_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_188, begin_mask = v_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_189, end_mask = v_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_31_stride_0, update = linear_61_cast_fp16, x = coreml_update_state_127)[name = string("v_cache2_internal_tensor_assign_31_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_31_cast_fp16, input = v_cache2)[name = string("coreml_update_state_129_write_state")]; + tensor coreml_update_state_129 = read_state(input = v_cache2)[name = string("coreml_update_state_129")]; + tensor var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239949824)))]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1185_to_fp16, x = audio_data)[name = string("linear_62_cast_fp16")]; + tensor var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243226688)))]; + tensor var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246503552)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1190_to_fp16, weight = var_1189_to_fp16, x = audio_data)[name = string("linear_63_cast_fp16")]; + tensor var_1192_shape_cast_fp16 = shape(x = linear_62_cast_fp16)[name = string("op_1192_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1192_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1192_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1192_shape_cast_fp16_to_uint16 = cast(dtype = var_1192_shape_cast_fp16_to_uint16_dtype_0, x = var_1192_shape_cast_fp16)[name = string("cast_75")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1192_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_74")]; + tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = gather_62_cast_uint16_to_int32)[name = string("expand_dims_259")]; + tensor concat_191 = const()[name = string("concat_191"), val = tensor([31, 0, 0, 0])]; + tensor concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = tensor([0])]; + tensor concat_192_values1_0 = const()[name = string("concat_192_values1_0"), val = tensor([0])]; + tensor concat_192_values3_0 = const()[name = string("concat_192_values3_0"), val = tensor([0])]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, concat_192_values1_0, expand_dims_259, concat_192_values3_0))[name = string("concat_192")]; + tensor k_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_191, begin_mask = k_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_192, end_mask = k_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_32_stride_0, update = linear_62_cast_fp16, x = coreml_update_state_128)[name = string("k_cache2_internal_tensor_assign_32_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_32_cast_fp16, input = k_cache2)[name = string("coreml_update_state_130_write_state")]; + tensor var_1197_shape_cast_fp16 = shape(x = linear_63_cast_fp16)[name = string("op_1197_shape_cast_fp16")]; + int32 gather_63_axis_0 = const()[name = string("gather_63_axis_0"), val = int32(0)]; + int32 gather_63_batch_dims_0 = const()[name = string("gather_63_batch_dims_0"), val = int32(0)]; + bool gather_63_validate_indices_0 = const()[name = string("gather_63_validate_indices_0"), val = bool(false)]; + string var_1197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_63_to_uint16 = const()[name = string("select_63_to_uint16"), val = uint16(1)]; + tensor var_1197_shape_cast_fp16_to_uint16 = cast(dtype = var_1197_shape_cast_fp16_to_uint16_dtype_0, x = var_1197_shape_cast_fp16)[name = string("cast_73")]; + uint16 gather_63_cast_uint16 = gather(axis = gather_63_axis_0, batch_dims = gather_63_batch_dims_0, indices = select_63_to_uint16, validate_indices = gather_63_validate_indices_0, x = var_1197_shape_cast_fp16_to_uint16)[name = string("gather_63_cast_uint16")]; + string gather_63_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_63_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_263_axes_0 = const()[name = string("expand_dims_263_axes_0"), val = tensor([0])]; + int32 gather_63_cast_uint16_to_int32 = cast(dtype = gather_63_cast_uint16_to_int32_dtype_0, x = gather_63_cast_uint16)[name = string("cast_72")]; + tensor expand_dims_263 = expand_dims(axes = expand_dims_263_axes_0, x = gather_63_cast_uint16_to_int32)[name = string("expand_dims_263")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([31, 0, 0, 0])]; + tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([0])]; + tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; + tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, concat_195_values1_0, expand_dims_263, concat_195_values3_0))[name = string("concat_195")]; + tensor v_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_194, begin_mask = v_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_195, end_mask = v_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_32_stride_0, update = linear_63_cast_fp16, x = coreml_update_state_129)[name = string("v_cache2_internal_tensor_assign_32_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_32_cast_fp16, input = v_cache2)[name = string("coreml_update_state_131_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/large-v2/decoder_first.mlmodelc/weights/weight.bin b/large-v2/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..eeda721bfad61ab73d33a009d8b13c38c825cc19 --- /dev/null +++ b/large-v2/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac46c34d51832dd11fbc34c772a9a35a5fb4cace68406b7044dd4ba652dca1c +size 246506176 diff --git a/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin b/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..52ba52f49e9149434ffb00691f5e035298ccf6a1 --- /dev/null +++ b/large-v2/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afd3cf2ab05eb2b7268afb62f418b5df01b6b5d60d746bdeec2b5ad8d760f65 +size 243 diff --git a/large-v2/decoder_second.mlmodelc/coremldata.bin b/large-v2/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..06d384eab53d9890f25a9f07a76b3771dcd2b170 --- /dev/null +++ b/large-v2/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5601244df54c60a16c26b761742867d06c6ef440ab8b0776ce5f6d1b4875c95 +size 487 diff --git a/large-v2/decoder_second.mlmodelc/metadata.json b/large-v2/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..722e4912d37eb8c71f0d55eb4ea48b33db80210d --- /dev/null +++ b/large-v2/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 257, + "Ios18.readState" : 66, + "Ios18.expandDims" : 33, + "Ios18.sub" : 1, + "Ios18.matmul" : 128, + "Ios18.gelu" : 32, + "Ios18.gather" : 35, + "Ios18.concat" : 162, + "Shape" : 34, + "Ios18.add" : 161, + "Ios18.sliceUpdate" : 128, + "Ios18.sliceByIndex" : 257, + "Ios18.layerNorm" : 97, + "Ios18.cast" : 68, + "Ios18.transpose" : 256, + "Ios18.writeState" : 64, + "Ios18.reshape" : 256, + "Ios18.softmax" : 64, + "Ios18.mul" : 128 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v2/decoder_second.mlmodelc/model.mil b/large-v2/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..897dc495c4bc16b52a2d3e9398cae3acf9ec05bc --- /dev/null +++ b/large-v2/decoder_second.mlmodelc/model.mil @@ -0,0 +1,6298 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_78_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_78_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_78_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_78_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_78_shape_cast_fp16_to_int16 = cast(dtype = var_78_shape_cast_fp16_to_int16_dtype_0, x = var_78_shape_cast_fp16)[name = string("cast_394")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_78_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_82_shape = shape(x = token_data)[name = string("op_82_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_82_shape_to_uint16_dtype_0 = const()[name = string("op_82_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_82_shape_to_uint16 = cast(dtype = var_82_shape_to_uint16_dtype_0, x = var_82_shape)[name = string("cast_392")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_82_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_391")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_393")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_154_axis_0 = const()[name = string("op_154_axis_0"), val = int32(0)]; + int32 var_154_batch_dims_0 = const()[name = string("op_154_batch_dims_0"), val = int32(0)]; + bool var_154_validate_indices_0 = const()[name = string("op_154_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_154_cast_fp16 = gather(axis = var_154_axis_0, batch_dims = var_154_batch_dims_0, indices = token_data, validate_indices = var_154_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_154_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1280)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_157_end_mask_0 = const()[name = string("op_157_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132774528)))]; + tensor var_157_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_157_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_157_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_154_cast_fp16, y = var_157_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_180 = const()[name = string("op_180"), val = int32(-1)]; + tensor var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133921472)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924096)))]; + fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_198_cast_fp16 = layer_norm(axes = var_198_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_198_cast_fp16")]; + tensor var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133926720)))]; + tensor var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137203584)))]; + tensor linear_0_cast_fp16 = linear(bias = var_210_to_fp16, weight = var_209_to_fp16, x = var_198_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137206208)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140483072)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_213_to_fp16, x = var_198_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140485696)))]; + tensor var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143762560)))]; + tensor linear_2_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_198_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_220_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_220_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_220_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_220_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_220_shape_cast_fp16_to_uint16 = cast(dtype = var_220_shape_cast_fp16_to_uint16_dtype_0, x = var_220_shape_cast_fp16)[name = string("cast_390")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_220_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_389")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1280)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor([0, 0, 0])]; + tensor var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor([true, false, true])]; + tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_10, end_mask = var_236_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_236_cast_fp16")]; + tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 0])]; + tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([true, false, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = concat_10, end_mask = var_239_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 20, 64])]; + tensor var_249_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")]; + tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_249_cast_fp16, y = const_160_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 20, 64])]; + tensor var_256_cast_fp16 = reshape(shape = concat_13x, x = var_236_cast_fp16)[name = string("op_256_cast_fp16")]; + tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_256_cast_fp16, y = const_161_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 20, 64])]; + tensor var_263_cast_fp16 = reshape(shape = concat_14x, x = var_239_cast_fp16)[name = string("op_263_cast_fp16")]; + tensor var_264 = const()[name = string("op_264"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_258 = transpose(perm = transpose_258_perm_0, x = k_5_cast_fp16)[name = string("transpose_638")]; + tensor transpose_257 = transpose(perm = transpose_257_perm_0, x = q_3_cast_fp16)[name = string("transpose_639")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_267_begin_0 = const()[name = string("op_267_begin_0"), val = tensor([0, 0])]; + tensor var_267_end_mask_0 = const()[name = string("op_267_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143765184)))]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = concat_15, end_mask = var_267_end_mask_0, x = mask_to_fp16)[name = string("op_267_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor([0, 0])]; + tensor var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor([true, false])]; + tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = concat_16, end_mask = var_268_end_mask_0, x = var_267_cast_fp16)[name = string("op_268_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_268_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_271_cast_fp16 = softmax(axis = var_180, x = qk_3_cast_fp16)[name = string("op_271_cast_fp16")]; + bool var_273_transpose_x_0 = const()[name = string("op_273_transpose_x_0"), val = bool(false)]; + bool var_273_transpose_y_0 = const()[name = string("op_273_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_264, x = var_263_cast_fp16)[name = string("transpose_640")]; + tensor var_273_cast_fp16 = matmul(transpose_x = var_273_transpose_x_0, transpose_y = var_273_transpose_y_0, x = var_271_cast_fp16, y = v_5_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor var_274 = const()[name = string("op_274"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 1280])]; + tensor var_275_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_637")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_275_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144166656)))]; + tensor var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147443520)))]; + tensor linear_3_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147446144)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147448768)))]; + tensor var_287_cast_fp16 = layer_norm(axes = var_287_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_287_cast_fp16")]; + tensor var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147451392)))]; + tensor var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150728256)))]; + tensor linear_4_cast_fp16 = linear(bias = var_297_to_fp16, weight = var_296_to_fp16, x = var_287_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150730880)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 20, 64])]; + tensor var_317_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_317_cast_fp16")]; + tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_317_cast_fp16, y = const_162_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_323 = const()[name = string("op_323"), val = tensor([1, 1500, 20, -1])]; + tensor var_324_cast_fp16 = reshape(shape = var_323, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_324_cast_fp16")]; + tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_324_cast_fp16, y = const_163_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_330 = const()[name = string("op_330"), val = tensor([1, 1500, 20, -1])]; + tensor var_331_cast_fp16 = reshape(shape = var_330, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_331_cast_fp16")]; + tensor var_332 = const()[name = string("op_332"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_260 = transpose(perm = transpose_260_perm_0, x = k_9_cast_fp16)[name = string("transpose_634")]; + tensor transpose_259 = transpose(perm = transpose_259_perm_0, x = q_7_cast_fp16)[name = string("transpose_635")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_5_cast_fp16")]; + tensor var_336_cast_fp16 = softmax(axis = var_180, x = qk_5_cast_fp16)[name = string("op_336_cast_fp16")]; + bool var_338_transpose_x_0 = const()[name = string("op_338_transpose_x_0"), val = bool(false)]; + bool var_338_transpose_y_0 = const()[name = string("op_338_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_332, x = var_331_cast_fp16)[name = string("transpose_636")]; + tensor var_338_cast_fp16 = matmul(transpose_x = var_338_transpose_x_0, transpose_y = var_338_transpose_y_0, x = var_336_cast_fp16, y = v_9_cast_fp16)[name = string("op_338_cast_fp16")]; + tensor var_339 = const()[name = string("op_339"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 1280])]; + tensor var_340_cast_fp16 = transpose(perm = var_339, x = var_338_cast_fp16)[name = string("transpose_633")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_340_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154570944)))]; + tensor var_345_to_fp16 = const()[name = string("op_345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157847808)))]; + tensor linear_5_cast_fp16 = linear(bias = var_345_to_fp16, weight = var_344_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157850432)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157853056)))]; + tensor var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_352_cast_fp16")]; + tensor var_361_to_fp16 = const()[name = string("op_361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157855680)))]; + tensor var_362_to_fp16 = const()[name = string("op_362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170962944)))]; + tensor linear_6_cast_fp16 = linear(bias = var_362_to_fp16, weight = var_361_to_fp16, x = var_352_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170973248)))]; + tensor var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184080512)))]; + tensor linear_7_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_391 = const()[name = string("op_391"), val = int32(-1)]; + tensor var_409_axes_0 = const()[name = string("op_409_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184083136)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184085760)))]; + fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_409_cast_fp16 = layer_norm(axes = var_409_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_409_cast_fp16")]; + tensor var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184088384)))]; + tensor var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187365248)))]; + tensor linear_8_cast_fp16 = linear(bias = var_421_to_fp16, weight = var_420_to_fp16, x = var_409_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187367872)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_424_to_fp16, x = var_409_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190644736)))]; + tensor var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193921600)))]; + tensor linear_10_cast_fp16 = linear(bias = var_429_to_fp16, weight = var_428_to_fp16, x = var_409_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_431_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_431_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_431_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_431_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_431_shape_cast_fp16_to_uint16 = cast(dtype = var_431_shape_cast_fp16_to_uint16_dtype_0, x = var_431_shape_cast_fp16)[name = string("cast_388")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_431_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_387")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1280)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor([0, 0, 0])]; + tensor var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor([true, false, true])]; + tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_32, end_mask = var_447_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor([0, 0, 0])]; + tensor var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor([true, false, true])]; + tensor var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = concat_32, end_mask = var_450_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_450_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 20, 64])]; + tensor var_460_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_460_cast_fp16")]; + tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_460_cast_fp16, y = const_164_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 20, 64])]; + tensor var_467_cast_fp16 = reshape(shape = concat_35x, x = var_447_cast_fp16)[name = string("op_467_cast_fp16")]; + tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_467_cast_fp16, y = const_165_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 20, 64])]; + tensor var_474_cast_fp16 = reshape(shape = concat_36x, x = var_450_cast_fp16)[name = string("op_474_cast_fp16")]; + tensor var_475 = const()[name = string("op_475"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_262 = transpose(perm = transpose_262_perm_0, x = k_15_cast_fp16)[name = string("transpose_630")]; + tensor transpose_261 = transpose(perm = transpose_261_perm_0, x = q_11_cast_fp16)[name = string("transpose_631")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_478_begin_0 = const()[name = string("op_478_begin_0"), val = tensor([0, 0])]; + tensor var_478_end_mask_0 = const()[name = string("op_478_end_mask_0"), val = tensor([false, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = concat_37, end_mask = var_478_end_mask_0, x = mask_to_fp16)[name = string("op_478_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_479_begin_0 = const()[name = string("op_479_begin_0"), val = tensor([0, 0])]; + tensor var_479_end_mask_0 = const()[name = string("op_479_end_mask_0"), val = tensor([true, false])]; + tensor var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = concat_38, end_mask = var_479_end_mask_0, x = var_478_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_479_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_482_cast_fp16 = softmax(axis = var_391, x = qk_9_cast_fp16)[name = string("op_482_cast_fp16")]; + bool var_484_transpose_x_0 = const()[name = string("op_484_transpose_x_0"), val = bool(false)]; + bool var_484_transpose_y_0 = const()[name = string("op_484_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_475, x = var_474_cast_fp16)[name = string("transpose_632")]; + tensor var_484_cast_fp16 = matmul(transpose_x = var_484_transpose_x_0, transpose_y = var_484_transpose_y_0, x = var_482_cast_fp16, y = v_15_cast_fp16)[name = string("op_484_cast_fp16")]; + tensor var_485 = const()[name = string("op_485"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 1280])]; + tensor var_486_cast_fp16 = transpose(perm = var_485, x = var_484_cast_fp16)[name = string("transpose_629")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_486_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_490_to_fp16 = const()[name = string("op_490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193924224)))]; + tensor var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197201088)))]; + tensor linear_11_cast_fp16 = linear(bias = var_491_to_fp16, weight = var_490_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197203712)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197206336)))]; + tensor var_498_cast_fp16 = layer_norm(axes = var_498_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_498_cast_fp16")]; + tensor var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197208960)))]; + tensor var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200485824)))]; + tensor linear_12_cast_fp16 = linear(bias = var_508_to_fp16, weight = var_507_to_fp16, x = var_498_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 20, 64])]; + tensor var_528_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_528_cast_fp16")]; + tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_528_cast_fp16, y = const_166_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_534 = const()[name = string("op_534"), val = tensor([1, 1500, 20, -1])]; + tensor var_535_cast_fp16 = reshape(shape = var_534, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_535_cast_fp16")]; + tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_535_cast_fp16, y = const_167_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_541 = const()[name = string("op_541"), val = tensor([1, 1500, 20, -1])]; + tensor var_542_cast_fp16 = reshape(shape = var_541, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_542_cast_fp16")]; + tensor var_543 = const()[name = string("op_543"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_264 = transpose(perm = transpose_264_perm_0, x = k_19_cast_fp16)[name = string("transpose_626")]; + tensor transpose_263 = transpose(perm = transpose_263_perm_0, x = q_15_cast_fp16)[name = string("transpose_627")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_11_cast_fp16")]; + tensor var_547_cast_fp16 = softmax(axis = var_391, x = qk_11_cast_fp16)[name = string("op_547_cast_fp16")]; + bool var_549_transpose_x_0 = const()[name = string("op_549_transpose_x_0"), val = bool(false)]; + bool var_549_transpose_y_0 = const()[name = string("op_549_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_543, x = var_542_cast_fp16)[name = string("transpose_628")]; + tensor var_549_cast_fp16 = matmul(transpose_x = var_549_transpose_x_0, transpose_y = var_549_transpose_y_0, x = var_547_cast_fp16, y = v_19_cast_fp16)[name = string("op_549_cast_fp16")]; + tensor var_550 = const()[name = string("op_550"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 1280])]; + tensor var_551_cast_fp16 = transpose(perm = var_550, x = var_549_cast_fp16)[name = string("transpose_625")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_551_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200488448)))]; + tensor var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203765312)))]; + tensor linear_13_cast_fp16 = linear(bias = var_556_to_fp16, weight = var_555_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203767936)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203770560)))]; + tensor var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = string("op_572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203773184)))]; + tensor var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216880448)))]; + tensor linear_14_cast_fp16 = linear(bias = var_573_to_fp16, weight = var_572_to_fp16, x = var_563_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216890752)))]; + tensor var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229998016)))]; + tensor linear_15_cast_fp16 = linear(bias = var_579_to_fp16, weight = var_578_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_602 = const()[name = string("op_602"), val = int32(-1)]; + tensor var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230000640)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230003264)))]; + fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_620_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230005888)))]; + tensor var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233282752)))]; + tensor linear_16_cast_fp16 = linear(bias = var_632_to_fp16, weight = var_631_to_fp16, x = var_620_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = string("op_635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233285376)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_635_to_fp16, x = var_620_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236562240)))]; + tensor var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239839104)))]; + tensor linear_18_cast_fp16 = linear(bias = var_640_to_fp16, weight = var_639_to_fp16, x = var_620_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_642_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_642_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_642_shape_cast_fp16_to_uint16 = cast(dtype = var_642_shape_cast_fp16_to_uint16_dtype_0, x = var_642_shape_cast_fp16)[name = string("cast_386")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_642_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_385")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1280)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor([0, 0, 0])]; + tensor var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor([true, false, true])]; + tensor var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_54, end_mask = var_658_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_658_cast_fp16")]; + tensor var_661_begin_0 = const()[name = string("op_661_begin_0"), val = tensor([0, 0, 0])]; + tensor var_661_end_mask_0 = const()[name = string("op_661_end_mask_0"), val = tensor([true, false, true])]; + tensor var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = concat_54, end_mask = var_661_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 20, 64])]; + tensor var_671_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_671_cast_fp16")]; + tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_671_cast_fp16, y = const_168_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 20, 64])]; + tensor var_678_cast_fp16 = reshape(shape = concat_57x, x = var_658_cast_fp16)[name = string("op_678_cast_fp16")]; + tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_678_cast_fp16, y = const_169_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 20, 64])]; + tensor var_685_cast_fp16 = reshape(shape = concat_58x, x = var_661_cast_fp16)[name = string("op_685_cast_fp16")]; + tensor var_686 = const()[name = string("op_686"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_266 = transpose(perm = transpose_266_perm_0, x = k_25_cast_fp16)[name = string("transpose_622")]; + tensor transpose_265 = transpose(perm = transpose_265_perm_0, x = q_19_cast_fp16)[name = string("transpose_623")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor([0, 0])]; + tensor var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor([false, true])]; + tensor var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = concat_59, end_mask = var_689_end_mask_0, x = mask_to_fp16)[name = string("op_689_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor([0, 0])]; + tensor var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor([true, false])]; + tensor var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = concat_60, end_mask = var_690_end_mask_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_690_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_693_cast_fp16 = softmax(axis = var_602, x = qk_15_cast_fp16)[name = string("op_693_cast_fp16")]; + bool var_695_transpose_x_0 = const()[name = string("op_695_transpose_x_0"), val = bool(false)]; + bool var_695_transpose_y_0 = const()[name = string("op_695_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_686, x = var_685_cast_fp16)[name = string("transpose_624")]; + tensor var_695_cast_fp16 = matmul(transpose_x = var_695_transpose_x_0, transpose_y = var_695_transpose_y_0, x = var_693_cast_fp16, y = v_25_cast_fp16)[name = string("op_695_cast_fp16")]; + tensor var_696 = const()[name = string("op_696"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 1280])]; + tensor var_697_cast_fp16 = transpose(perm = var_696, x = var_695_cast_fp16)[name = string("transpose_621")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_697_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239841728)))]; + tensor var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243118592)))]; + tensor linear_19_cast_fp16 = linear(bias = var_702_to_fp16, weight = var_701_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243121216)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243123840)))]; + tensor var_709_cast_fp16 = layer_norm(axes = var_709_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_709_cast_fp16")]; + tensor var_718_to_fp16 = const()[name = string("op_718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243126464)))]; + tensor var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246403328)))]; + tensor linear_20_cast_fp16 = linear(bias = var_719_to_fp16, weight = var_718_to_fp16, x = var_709_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 20, 64])]; + tensor var_739_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_739_cast_fp16")]; + tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_739_cast_fp16, y = const_170_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_745 = const()[name = string("op_745"), val = tensor([1, 1500, 20, -1])]; + tensor var_746_cast_fp16 = reshape(shape = var_745, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_746_cast_fp16")]; + tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_746_cast_fp16, y = const_171_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_752 = const()[name = string("op_752"), val = tensor([1, 1500, 20, -1])]; + tensor var_753_cast_fp16 = reshape(shape = var_752, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_753_cast_fp16")]; + tensor var_754 = const()[name = string("op_754"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_268 = transpose(perm = transpose_268_perm_0, x = k_29_cast_fp16)[name = string("transpose_618")]; + tensor transpose_267 = transpose(perm = transpose_267_perm_0, x = q_23_cast_fp16)[name = string("transpose_619")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_17_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_602, x = qk_17_cast_fp16)[name = string("op_758_cast_fp16")]; + bool var_760_transpose_x_0 = const()[name = string("op_760_transpose_x_0"), val = bool(false)]; + bool var_760_transpose_y_0 = const()[name = string("op_760_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_754, x = var_753_cast_fp16)[name = string("transpose_620")]; + tensor var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_0, transpose_y = var_760_transpose_y_0, x = var_758_cast_fp16, y = v_29_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor var_761 = const()[name = string("op_761"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 1280])]; + tensor var_762_cast_fp16 = transpose(perm = var_761, x = var_760_cast_fp16)[name = string("transpose_617")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_762_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246405952)))]; + tensor var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249682816)))]; + tensor linear_21_cast_fp16 = linear(bias = var_767_to_fp16, weight = var_766_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_774_axes_0 = const()[name = string("op_774_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249685440)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249688064)))]; + tensor var_774_cast_fp16 = layer_norm(axes = var_774_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor var_783_to_fp16 = const()[name = string("op_783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249690688)))]; + tensor var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262797952)))]; + tensor linear_22_cast_fp16 = linear(bias = var_784_to_fp16, weight = var_783_to_fp16, x = var_774_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_789_to_fp16 = const()[name = string("op_789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262808256)))]; + tensor var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275915520)))]; + tensor linear_23_cast_fp16 = linear(bias = var_790_to_fp16, weight = var_789_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; + tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; + tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; + tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; + int32 var_813 = const()[name = string("op_813"), val = int32(-1)]; + tensor var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275918144)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275920768)))]; + fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_831_cast_fp16 = layer_norm(axes = var_831_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_831_cast_fp16")]; + tensor var_842_to_fp16 = const()[name = string("op_842_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275923392)))]; + tensor var_843_to_fp16 = const()[name = string("op_843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279200256)))]; + tensor linear_24_cast_fp16 = linear(bias = var_843_to_fp16, weight = var_842_to_fp16, x = var_831_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_846_to_fp16 = const()[name = string("op_846_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279202880)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_846_to_fp16, x = var_831_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282479744)))]; + tensor var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285756608)))]; + tensor linear_26_cast_fp16 = linear(bias = var_851_to_fp16, weight = var_850_to_fp16, x = var_831_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_853_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_853_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_384")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_383")]; + int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1280)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; + tensor var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor([0, 0, 0])]; + tensor var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor([true, false, true])]; + tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_76, end_mask = var_869_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor var_872_begin_0 = const()[name = string("op_872_begin_0"), val = tensor([0, 0, 0])]; + tensor var_872_end_mask_0 = const()[name = string("op_872_end_mask_0"), val = tensor([true, false, true])]; + tensor var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = concat_76, end_mask = var_872_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_872_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 20, 64])]; + tensor var_882_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_882_cast_fp16")]; + tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_882_cast_fp16, y = const_172_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 20, 64])]; + tensor var_889_cast_fp16 = reshape(shape = concat_79x, x = var_869_cast_fp16)[name = string("op_889_cast_fp16")]; + tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_889_cast_fp16, y = const_173_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 20, 64])]; + tensor var_896_cast_fp16 = reshape(shape = concat_80x, x = var_872_cast_fp16)[name = string("op_896_cast_fp16")]; + tensor var_897 = const()[name = string("op_897"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_270 = transpose(perm = transpose_270_perm_0, x = k_35_cast_fp16)[name = string("transpose_614")]; + tensor transpose_269 = transpose(perm = transpose_269_perm_0, x = q_27_cast_fp16)[name = string("transpose_615")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_900_begin_0 = const()[name = string("op_900_begin_0"), val = tensor([0, 0])]; + tensor var_900_end_mask_0 = const()[name = string("op_900_end_mask_0"), val = tensor([false, true])]; + tensor var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = concat_81, end_mask = var_900_end_mask_0, x = mask_to_fp16)[name = string("op_900_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_901_begin_0 = const()[name = string("op_901_begin_0"), val = tensor([0, 0])]; + tensor var_901_end_mask_0 = const()[name = string("op_901_end_mask_0"), val = tensor([true, false])]; + tensor var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = concat_82, end_mask = var_901_end_mask_0, x = var_900_cast_fp16)[name = string("op_901_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_901_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_904_cast_fp16 = softmax(axis = var_813, x = qk_21_cast_fp16)[name = string("op_904_cast_fp16")]; + bool var_906_transpose_x_0 = const()[name = string("op_906_transpose_x_0"), val = bool(false)]; + bool var_906_transpose_y_0 = const()[name = string("op_906_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_897, x = var_896_cast_fp16)[name = string("transpose_616")]; + tensor var_906_cast_fp16 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_904_cast_fp16, y = v_35_cast_fp16)[name = string("op_906_cast_fp16")]; + tensor var_907 = const()[name = string("op_907"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 1280])]; + tensor var_908_cast_fp16 = transpose(perm = var_907, x = var_906_cast_fp16)[name = string("transpose_613")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_908_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285759232)))]; + tensor var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289036096)))]; + tensor linear_27_cast_fp16 = linear(bias = var_913_to_fp16, weight = var_912_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289038720)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289041344)))]; + tensor var_920_cast_fp16 = layer_norm(axes = var_920_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_920_cast_fp16")]; + tensor var_929_to_fp16 = const()[name = string("op_929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289043968)))]; + tensor var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292320832)))]; + tensor linear_28_cast_fp16 = linear(bias = var_930_to_fp16, weight = var_929_to_fp16, x = var_920_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 20, 64])]; + tensor var_950_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_950_cast_fp16")]; + tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_950_cast_fp16, y = const_174_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_956 = const()[name = string("op_956"), val = tensor([1, 1500, 20, -1])]; + tensor var_957_cast_fp16 = reshape(shape = var_956, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_957_cast_fp16")]; + tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_957_cast_fp16, y = const_175_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_963 = const()[name = string("op_963"), val = tensor([1, 1500, 20, -1])]; + tensor var_964_cast_fp16 = reshape(shape = var_963, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_964_cast_fp16")]; + tensor var_965 = const()[name = string("op_965"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_272 = transpose(perm = transpose_272_perm_0, x = k_39_cast_fp16)[name = string("transpose_610")]; + tensor transpose_271 = transpose(perm = transpose_271_perm_0, x = q_31_cast_fp16)[name = string("transpose_611")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_23_cast_fp16")]; + tensor var_969_cast_fp16 = softmax(axis = var_813, x = qk_23_cast_fp16)[name = string("op_969_cast_fp16")]; + bool var_971_transpose_x_0 = const()[name = string("op_971_transpose_x_0"), val = bool(false)]; + bool var_971_transpose_y_0 = const()[name = string("op_971_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_965, x = var_964_cast_fp16)[name = string("transpose_612")]; + tensor var_971_cast_fp16 = matmul(transpose_x = var_971_transpose_x_0, transpose_y = var_971_transpose_y_0, x = var_969_cast_fp16, y = v_39_cast_fp16)[name = string("op_971_cast_fp16")]; + tensor var_972 = const()[name = string("op_972"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 1280])]; + tensor var_973_cast_fp16 = transpose(perm = var_972, x = var_971_cast_fp16)[name = string("transpose_609")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_973_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292323456)))]; + tensor var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295600320)))]; + tensor linear_29_cast_fp16 = linear(bias = var_978_to_fp16, weight = var_977_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295602944)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295605568)))]; + tensor var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_985_cast_fp16")]; + tensor var_994_to_fp16 = const()[name = string("op_994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295608192)))]; + tensor var_995_to_fp16 = const()[name = string("op_995_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308715456)))]; + tensor linear_30_cast_fp16 = linear(bias = var_995_to_fp16, weight = var_994_to_fp16, x = var_985_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308725760)))]; + tensor var_1001_to_fp16 = const()[name = string("op_1001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321833024)))]; + tensor linear_31_cast_fp16 = linear(bias = var_1001_to_fp16, weight = var_1000_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; + tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_17_cast_fp16")]; + tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; + tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_17_cast_fp16")]; + tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; + tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; + tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; + tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; + int32 var_1024 = const()[name = string("op_1024"), val = int32(-1)]; + tensor var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321835648)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321838272)))]; + fp16 var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1042_cast_fp16")]; + tensor var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321840896)))]; + tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325117760)))]; + tensor linear_32_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = var_1042_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1057_to_fp16 = const()[name = string("op_1057_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325120384)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1057_to_fp16, x = var_1042_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328397248)))]; + tensor var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331674112)))]; + tensor linear_34_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1042_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor var_1064_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1064_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_382")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_381")]; + int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; + tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; + tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")]; + tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")]; + int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; + int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1280)]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; + tensor var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_98, end_mask = var_1080_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1080_cast_fp16")]; + tensor var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = concat_98, end_mask = var_1083_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1083_cast_fp16")]; + tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 20, 64])]; + tensor var_1093_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1093_cast_fp16")]; + tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1093_cast_fp16, y = const_176_to_fp16)[name = string("q_35_cast_fp16")]; + tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 20, 64])]; + tensor var_1100_cast_fp16 = reshape(shape = concat_101x, x = var_1080_cast_fp16)[name = string("op_1100_cast_fp16")]; + tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_45_cast_fp16 = mul(x = var_1100_cast_fp16, y = const_177_to_fp16)[name = string("k_45_cast_fp16")]; + tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 20, 64])]; + tensor var_1107_cast_fp16 = reshape(shape = concat_102x, x = var_1083_cast_fp16)[name = string("op_1107_cast_fp16")]; + tensor var_1108 = const()[name = string("op_1108"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_274 = transpose(perm = transpose_274_perm_0, x = k_45_cast_fp16)[name = string("transpose_606")]; + tensor transpose_273 = transpose(perm = transpose_273_perm_0, x = q_35_cast_fp16)[name = string("transpose_607")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_25_cast_fp16")]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; + tensor var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor([0, 0])]; + tensor var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor([false, true])]; + tensor var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = concat_103, end_mask = var_1111_end_mask_0, x = mask_to_fp16)[name = string("op_1111_cast_fp16")]; + int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; + tensor var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor([0, 0])]; + tensor var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor([true, false])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = concat_104, end_mask = var_1112_end_mask_0, x = var_1111_cast_fp16)[name = string("op_1112_cast_fp16")]; + tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1112_cast_fp16)[name = string("qk_27_cast_fp16")]; + tensor var_1115_cast_fp16 = softmax(axis = var_1024, x = qk_27_cast_fp16)[name = string("op_1115_cast_fp16")]; + bool var_1117_transpose_x_0 = const()[name = string("op_1117_transpose_x_0"), val = bool(false)]; + bool var_1117_transpose_y_0 = const()[name = string("op_1117_transpose_y_0"), val = bool(false)]; + tensor v_45_cast_fp16 = transpose(perm = var_1108, x = var_1107_cast_fp16)[name = string("transpose_608")]; + tensor var_1117_cast_fp16 = matmul(transpose_x = var_1117_transpose_x_0, transpose_y = var_1117_transpose_y_0, x = var_1115_cast_fp16, y = v_45_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor var_1118 = const()[name = string("op_1118"), val = tensor([0, 2, 1, 3])]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 1280])]; + tensor var_1119_cast_fp16 = transpose(perm = var_1118, x = var_1117_cast_fp16)[name = string("transpose_605")]; + tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1119_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor var_1123_to_fp16 = const()[name = string("op_1123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331676736)))]; + tensor var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334953600)))]; + tensor linear_35_cast_fp16 = linear(bias = var_1124_to_fp16, weight = var_1123_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; + tensor var_1131_axes_0 = const()[name = string("op_1131_axes_0"), val = tensor([-1])]; + tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334956224)))]; + tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334958848)))]; + tensor var_1131_cast_fp16 = layer_norm(axes = var_1131_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1131_cast_fp16")]; + tensor var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334961472)))]; + tensor var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338238336)))]; + tensor linear_36_cast_fp16 = linear(bias = var_1141_to_fp16, weight = var_1140_to_fp16, x = var_1131_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; + tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; + tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; + tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 20, 64])]; + tensor var_1161_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1161_cast_fp16")]; + tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1161_cast_fp16, y = const_178_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1167 = const()[name = string("op_1167"), val = tensor([1, 1500, 20, -1])]; + tensor var_1168_cast_fp16 = reshape(shape = var_1167, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1168_cast_fp16")]; + tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_49_cast_fp16 = mul(x = var_1168_cast_fp16, y = const_179_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_1174 = const()[name = string("op_1174"), val = tensor([1, 1500, 20, -1])]; + tensor var_1175_cast_fp16 = reshape(shape = var_1174, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1175_cast_fp16")]; + tensor var_1176 = const()[name = string("op_1176"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_276 = transpose(perm = transpose_276_perm_0, x = k_49_cast_fp16)[name = string("transpose_602")]; + tensor transpose_275 = transpose(perm = transpose_275_perm_0, x = q_39_cast_fp16)[name = string("transpose_603")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_29_cast_fp16")]; + tensor var_1180_cast_fp16 = softmax(axis = var_1024, x = qk_29_cast_fp16)[name = string("op_1180_cast_fp16")]; + bool var_1182_transpose_x_0 = const()[name = string("op_1182_transpose_x_0"), val = bool(false)]; + bool var_1182_transpose_y_0 = const()[name = string("op_1182_transpose_y_0"), val = bool(false)]; + tensor v_49_cast_fp16 = transpose(perm = var_1176, x = var_1175_cast_fp16)[name = string("transpose_604")]; + tensor var_1182_cast_fp16 = matmul(transpose_x = var_1182_transpose_x_0, transpose_y = var_1182_transpose_y_0, x = var_1180_cast_fp16, y = v_49_cast_fp16)[name = string("op_1182_cast_fp16")]; + tensor var_1183 = const()[name = string("op_1183"), val = tensor([0, 2, 1, 3])]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 1280])]; + tensor var_1184_cast_fp16 = transpose(perm = var_1183, x = var_1182_cast_fp16)[name = string("transpose_601")]; + tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1184_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338240960)))]; + tensor var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341517824)))]; + tensor linear_37_cast_fp16 = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; + tensor var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341520448)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341523072)))]; + tensor var_1196_cast_fp16 = layer_norm(axes = var_1196_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1196_cast_fp16")]; + tensor var_1205_to_fp16 = const()[name = string("op_1205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341525696)))]; + tensor var_1206_to_fp16 = const()[name = string("op_1206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354632960)))]; + tensor linear_38_cast_fp16 = linear(bias = var_1206_to_fp16, weight = var_1205_to_fp16, x = var_1196_cast_fp16)[name = string("linear_38_cast_fp16")]; + string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; + tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354643264)))]; + tensor var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367750528)))]; + tensor linear_39_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; + tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; + tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_21_cast_fp16")]; + tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; + tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_21_cast_fp16")]; + tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; + tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; + tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; + tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; + int32 var_1235 = const()[name = string("op_1235"), val = int32(-1)]; + tensor var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367753152)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367755776)))]; + fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1253_cast_fp16 = layer_norm(axes = var_1253_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1253_cast_fp16")]; + tensor var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367758400)))]; + tensor var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371035264)))]; + tensor linear_40_cast_fp16 = linear(bias = var_1265_to_fp16, weight = var_1264_to_fp16, x = var_1253_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371037888)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1268_to_fp16, x = var_1253_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374314752)))]; + tensor var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377591616)))]; + tensor linear_42_cast_fp16 = linear(bias = var_1273_to_fp16, weight = var_1272_to_fp16, x = var_1253_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_1275_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1275_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1275_shape_cast_fp16_to_uint16 = cast(dtype = var_1275_shape_cast_fp16_to_uint16_dtype_0, x = var_1275_shape_cast_fp16)[name = string("cast_380")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1275_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_379")]; + int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; + tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; + tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; + tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")]; + tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")]; + int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; + int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1280)]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; + tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_120, end_mask = var_1291_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = concat_120, end_mask = var_1294_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1294_cast_fp16")]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 20, 64])]; + tensor var_1304_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1304_cast_fp16")]; + tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1304_cast_fp16, y = const_180_to_fp16)[name = string("q_43_cast_fp16")]; + tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 20, 64])]; + tensor var_1311_cast_fp16 = reshape(shape = concat_123x, x = var_1291_cast_fp16)[name = string("op_1311_cast_fp16")]; + tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1311_cast_fp16, y = const_181_to_fp16)[name = string("k_55_cast_fp16")]; + tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 20, 64])]; + tensor var_1318_cast_fp16 = reshape(shape = concat_124x, x = var_1294_cast_fp16)[name = string("op_1318_cast_fp16")]; + tensor var_1319 = const()[name = string("op_1319"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_278 = transpose(perm = transpose_278_perm_0, x = k_55_cast_fp16)[name = string("transpose_598")]; + tensor transpose_277 = transpose(perm = transpose_277_perm_0, x = q_43_cast_fp16)[name = string("transpose_599")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_31_cast_fp16")]; + int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; + tensor var_1322_begin_0 = const()[name = string("op_1322_begin_0"), val = tensor([0, 0])]; + tensor var_1322_end_mask_0 = const()[name = string("op_1322_end_mask_0"), val = tensor([false, true])]; + tensor var_1322_cast_fp16 = slice_by_index(begin = var_1322_begin_0, end = concat_125, end_mask = var_1322_end_mask_0, x = mask_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; + tensor var_1323_begin_0 = const()[name = string("op_1323_begin_0"), val = tensor([0, 0])]; + tensor var_1323_end_mask_0 = const()[name = string("op_1323_end_mask_0"), val = tensor([true, false])]; + tensor var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = concat_126, end_mask = var_1323_end_mask_0, x = var_1322_cast_fp16)[name = string("op_1323_cast_fp16")]; + tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1323_cast_fp16)[name = string("qk_33_cast_fp16")]; + tensor var_1326_cast_fp16 = softmax(axis = var_1235, x = qk_33_cast_fp16)[name = string("op_1326_cast_fp16")]; + bool var_1328_transpose_x_0 = const()[name = string("op_1328_transpose_x_0"), val = bool(false)]; + bool var_1328_transpose_y_0 = const()[name = string("op_1328_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1319, x = var_1318_cast_fp16)[name = string("transpose_600")]; + tensor var_1328_cast_fp16 = matmul(transpose_x = var_1328_transpose_x_0, transpose_y = var_1328_transpose_y_0, x = var_1326_cast_fp16, y = v_55_cast_fp16)[name = string("op_1328_cast_fp16")]; + tensor var_1329 = const()[name = string("op_1329"), val = tensor([0, 2, 1, 3])]; + tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 1280])]; + tensor var_1330_cast_fp16 = transpose(perm = var_1329, x = var_1328_cast_fp16)[name = string("transpose_597")]; + tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1330_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1334_to_fp16 = const()[name = string("op_1334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377594240)))]; + tensor var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380871104)))]; + tensor linear_43_cast_fp16 = linear(bias = var_1335_to_fp16, weight = var_1334_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor([-1])]; + tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873728)))]; + tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380876352)))]; + tensor var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1342_cast_fp16")]; + tensor var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380878976)))]; + tensor var_1352_to_fp16 = const()[name = string("op_1352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384155840)))]; + tensor linear_44_cast_fp16 = linear(bias = var_1352_to_fp16, weight = var_1351_to_fp16, x = var_1342_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; + tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; + tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 20, 64])]; + tensor var_1372_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1372_cast_fp16, y = const_182_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1378 = const()[name = string("op_1378"), val = tensor([1, 1500, 20, -1])]; + tensor var_1379_cast_fp16 = reshape(shape = var_1378, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1379_cast_fp16")]; + tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1379_cast_fp16, y = const_183_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1385 = const()[name = string("op_1385"), val = tensor([1, 1500, 20, -1])]; + tensor var_1386_cast_fp16 = reshape(shape = var_1385, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1386_cast_fp16")]; + tensor var_1387 = const()[name = string("op_1387"), val = tensor([0, 2, 1, 3])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_280 = transpose(perm = transpose_280_perm_0, x = k_59_cast_fp16)[name = string("transpose_594")]; + tensor transpose_279 = transpose(perm = transpose_279_perm_0, x = q_47_cast_fp16)[name = string("transpose_595")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_35_cast_fp16")]; + tensor var_1391_cast_fp16 = softmax(axis = var_1235, x = qk_35_cast_fp16)[name = string("op_1391_cast_fp16")]; + bool var_1393_transpose_x_0 = const()[name = string("op_1393_transpose_x_0"), val = bool(false)]; + bool var_1393_transpose_y_0 = const()[name = string("op_1393_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1387, x = var_1386_cast_fp16)[name = string("transpose_596")]; + tensor var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_0, transpose_y = var_1393_transpose_y_0, x = var_1391_cast_fp16, y = v_59_cast_fp16)[name = string("op_1393_cast_fp16")]; + tensor var_1394 = const()[name = string("op_1394"), val = tensor([0, 2, 1, 3])]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 1280])]; + tensor var_1395_cast_fp16 = transpose(perm = var_1394, x = var_1393_cast_fp16)[name = string("transpose_593")]; + tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1395_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384158464)))]; + tensor var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387435328)))]; + tensor linear_45_cast_fp16 = linear(bias = var_1400_to_fp16, weight = var_1399_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387437952)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387440576)))]; + tensor var_1407_cast_fp16 = layer_norm(axes = var_1407_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1407_cast_fp16")]; + tensor var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387443200)))]; + tensor var_1417_to_fp16 = const()[name = string("op_1417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400550464)))]; + tensor linear_46_cast_fp16 = linear(bias = var_1417_to_fp16, weight = var_1416_to_fp16, x = var_1407_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; + tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400560768)))]; + tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413668032)))]; + tensor linear_47_cast_fp16 = linear(bias = var_1423_to_fp16, weight = var_1422_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; + tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; + tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_25_cast_fp16")]; + tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; + tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_25_cast_fp16")]; + tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; + tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; + tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; + tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; + int32 var_1446 = const()[name = string("op_1446"), val = int32(-1)]; + tensor var_1464_axes_0 = const()[name = string("op_1464_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413670656)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413673280)))]; + fp16 var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1464_cast_fp16 = layer_norm(axes = var_1464_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1464_cast_fp16")]; + tensor var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413675904)))]; + tensor var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416952768)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = var_1464_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1479_to_fp16 = const()[name = string("op_1479_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416955392)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1479_to_fp16, x = var_1464_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420232256)))]; + tensor var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423509120)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1484_to_fp16, weight = var_1483_to_fp16, x = var_1464_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1486_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1486_shape_cast_fp16")]; + int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; + int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; + bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; + string var_1486_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1486_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; + tensor var_1486_shape_cast_fp16_to_uint16 = cast(dtype = var_1486_shape_cast_fp16_to_uint16_dtype_0, x = var_1486_shape_cast_fp16)[name = string("cast_378")]; + uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1486_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; + string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_377")]; + int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; + tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; + int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; + bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; + tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; + tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; + tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; + tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; + int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; + bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; + tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; + tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")]; + tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")]; + int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; + int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1280)]; + int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; + bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; + tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; + tensor var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_142, end_mask = var_1502_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1502_cast_fp16")]; + tensor var_1505_begin_0 = const()[name = string("op_1505_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1505_end_mask_0 = const()[name = string("op_1505_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = concat_142, end_mask = var_1505_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1505_cast_fp16")]; + tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 20, 64])]; + tensor var_1515_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1515_cast_fp16")]; + tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1515_cast_fp16, y = const_184_to_fp16)[name = string("q_51_cast_fp16")]; + tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 20, 64])]; + tensor var_1522_cast_fp16 = reshape(shape = concat_145x, x = var_1502_cast_fp16)[name = string("op_1522_cast_fp16")]; + tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_65_cast_fp16 = mul(x = var_1522_cast_fp16, y = const_185_to_fp16)[name = string("k_65_cast_fp16")]; + tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 20, 64])]; + tensor var_1529_cast_fp16 = reshape(shape = concat_146x, x = var_1505_cast_fp16)[name = string("op_1529_cast_fp16")]; + tensor var_1530 = const()[name = string("op_1530"), val = tensor([0, 2, 1, 3])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_282 = transpose(perm = transpose_282_perm_0, x = k_65_cast_fp16)[name = string("transpose_590")]; + tensor transpose_281 = transpose(perm = transpose_281_perm_0, x = q_51_cast_fp16)[name = string("transpose_591")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_37_cast_fp16")]; + int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; + tensor var_1533_begin_0 = const()[name = string("op_1533_begin_0"), val = tensor([0, 0])]; + tensor var_1533_end_mask_0 = const()[name = string("op_1533_end_mask_0"), val = tensor([false, true])]; + tensor var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = concat_147, end_mask = var_1533_end_mask_0, x = mask_to_fp16)[name = string("op_1533_cast_fp16")]; + int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; + tensor var_1534_begin_0 = const()[name = string("op_1534_begin_0"), val = tensor([0, 0])]; + tensor var_1534_end_mask_0 = const()[name = string("op_1534_end_mask_0"), val = tensor([true, false])]; + tensor var_1534_cast_fp16 = slice_by_index(begin = var_1534_begin_0, end = concat_148, end_mask = var_1534_end_mask_0, x = var_1533_cast_fp16)[name = string("op_1534_cast_fp16")]; + tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1534_cast_fp16)[name = string("qk_39_cast_fp16")]; + tensor var_1537_cast_fp16 = softmax(axis = var_1446, x = qk_39_cast_fp16)[name = string("op_1537_cast_fp16")]; + bool var_1539_transpose_x_0 = const()[name = string("op_1539_transpose_x_0"), val = bool(false)]; + bool var_1539_transpose_y_0 = const()[name = string("op_1539_transpose_y_0"), val = bool(false)]; + tensor v_65_cast_fp16 = transpose(perm = var_1530, x = var_1529_cast_fp16)[name = string("transpose_592")]; + tensor var_1539_cast_fp16 = matmul(transpose_x = var_1539_transpose_x_0, transpose_y = var_1539_transpose_y_0, x = var_1537_cast_fp16, y = v_65_cast_fp16)[name = string("op_1539_cast_fp16")]; + tensor var_1540 = const()[name = string("op_1540"), val = tensor([0, 2, 1, 3])]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 1280])]; + tensor var_1541_cast_fp16 = transpose(perm = var_1540, x = var_1539_cast_fp16)[name = string("transpose_589")]; + tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1541_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423511744)))]; + tensor var_1546_to_fp16 = const()[name = string("op_1546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426788608)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1546_to_fp16, weight = var_1545_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_1553_axes_0 = const()[name = string("op_1553_axes_0"), val = tensor([-1])]; + tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426791232)))]; + tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426793856)))]; + tensor var_1553_cast_fp16 = layer_norm(axes = var_1553_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1553_cast_fp16")]; + tensor var_1562_to_fp16 = const()[name = string("op_1562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426796480)))]; + tensor var_1563_to_fp16 = const()[name = string("op_1563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430073344)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1563_to_fp16, weight = var_1562_to_fp16, x = var_1553_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; + tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; + tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; + tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; + tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 20, 64])]; + tensor var_1583_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1583_cast_fp16")]; + tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1583_cast_fp16, y = const_186_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1589 = const()[name = string("op_1589"), val = tensor([1, 1500, 20, -1])]; + tensor var_1590_cast_fp16 = reshape(shape = var_1589, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_69_cast_fp16 = mul(x = var_1590_cast_fp16, y = const_187_to_fp16)[name = string("k_69_cast_fp16")]; + tensor var_1596 = const()[name = string("op_1596"), val = tensor([1, 1500, 20, -1])]; + tensor var_1597_cast_fp16 = reshape(shape = var_1596, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor var_1598 = const()[name = string("op_1598"), val = tensor([0, 2, 1, 3])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_284 = transpose(perm = transpose_284_perm_0, x = k_69_cast_fp16)[name = string("transpose_586")]; + tensor transpose_283 = transpose(perm = transpose_283_perm_0, x = q_55_cast_fp16)[name = string("transpose_587")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_41_cast_fp16")]; + tensor var_1602_cast_fp16 = softmax(axis = var_1446, x = qk_41_cast_fp16)[name = string("op_1602_cast_fp16")]; + bool var_1604_transpose_x_0 = const()[name = string("op_1604_transpose_x_0"), val = bool(false)]; + bool var_1604_transpose_y_0 = const()[name = string("op_1604_transpose_y_0"), val = bool(false)]; + tensor v_69_cast_fp16 = transpose(perm = var_1598, x = var_1597_cast_fp16)[name = string("transpose_588")]; + tensor var_1604_cast_fp16 = matmul(transpose_x = var_1604_transpose_x_0, transpose_y = var_1604_transpose_y_0, x = var_1602_cast_fp16, y = v_69_cast_fp16)[name = string("op_1604_cast_fp16")]; + tensor var_1605 = const()[name = string("op_1605"), val = tensor([0, 2, 1, 3])]; + tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 1280])]; + tensor var_1606_cast_fp16 = transpose(perm = var_1605, x = var_1604_cast_fp16)[name = string("transpose_585")]; + tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1606_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430075968)))]; + tensor var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433352832)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1611_to_fp16, weight = var_1610_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433355456)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433358080)))]; + tensor var_1618_cast_fp16 = layer_norm(axes = var_1618_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1618_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433360704)))]; + tensor var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446467968)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1618_cast_fp16)[name = string("linear_54_cast_fp16")]; + string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; + tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; + tensor var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446478272)))]; + tensor var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459585536)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1634_to_fp16, weight = var_1633_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; + tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_29_cast_fp16")]; + tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; + tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_29_cast_fp16")]; + tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; + tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; + tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; + tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; + int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)]; + tensor var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459588160)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459590784)))]; + fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1675_cast_fp16 = layer_norm(axes = var_1675_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459593408)))]; + tensor var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462870272)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1687_to_fp16, weight = var_1686_to_fp16, x = var_1675_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462872896)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1690_to_fp16, x = var_1675_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466149760)))]; + tensor var_1695_to_fp16 = const()[name = string("op_1695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469426624)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1695_to_fp16, weight = var_1694_to_fp16, x = var_1675_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor var_1697_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1697_shape_cast_fp16")]; + int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; + int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; + bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; + string var_1697_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1697_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; + tensor var_1697_shape_cast_fp16_to_uint16 = cast(dtype = var_1697_shape_cast_fp16_to_uint16_dtype_0, x = var_1697_shape_cast_fp16)[name = string("cast_376")]; + uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1697_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; + string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_375")]; + int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; + tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; + tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")]; + tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")]; + int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; + int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1280)]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; + tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_164, end_mask = var_1713_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1713_cast_fp16")]; + tensor var_1716_begin_0 = const()[name = string("op_1716_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1716_end_mask_0 = const()[name = string("op_1716_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1716_cast_fp16 = slice_by_index(begin = var_1716_begin_0, end = concat_164, end_mask = var_1716_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1716_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 20, 64])]; + tensor var_1726_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1726_cast_fp16")]; + tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1726_cast_fp16, y = const_188_to_fp16)[name = string("q_59_cast_fp16")]; + tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 20, 64])]; + tensor var_1733_cast_fp16 = reshape(shape = concat_167x, x = var_1713_cast_fp16)[name = string("op_1733_cast_fp16")]; + tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_1733_cast_fp16, y = const_189_to_fp16)[name = string("k_75_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 20, 64])]; + tensor var_1740_cast_fp16 = reshape(shape = concat_168x, x = var_1716_cast_fp16)[name = string("op_1740_cast_fp16")]; + tensor var_1741 = const()[name = string("op_1741"), val = tensor([0, 2, 1, 3])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_286 = transpose(perm = transpose_286_perm_0, x = k_75_cast_fp16)[name = string("transpose_582")]; + tensor transpose_285 = transpose(perm = transpose_285_perm_0, x = q_59_cast_fp16)[name = string("transpose_583")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_43_cast_fp16")]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; + tensor var_1744_begin_0 = const()[name = string("op_1744_begin_0"), val = tensor([0, 0])]; + tensor var_1744_end_mask_0 = const()[name = string("op_1744_end_mask_0"), val = tensor([false, true])]; + tensor var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = concat_169, end_mask = var_1744_end_mask_0, x = mask_to_fp16)[name = string("op_1744_cast_fp16")]; + int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; + tensor var_1745_begin_0 = const()[name = string("op_1745_begin_0"), val = tensor([0, 0])]; + tensor var_1745_end_mask_0 = const()[name = string("op_1745_end_mask_0"), val = tensor([true, false])]; + tensor var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = concat_170, end_mask = var_1745_end_mask_0, x = var_1744_cast_fp16)[name = string("op_1745_cast_fp16")]; + tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1745_cast_fp16)[name = string("qk_45_cast_fp16")]; + tensor var_1748_cast_fp16 = softmax(axis = var_1657, x = qk_45_cast_fp16)[name = string("op_1748_cast_fp16")]; + bool var_1750_transpose_x_0 = const()[name = string("op_1750_transpose_x_0"), val = bool(false)]; + bool var_1750_transpose_y_0 = const()[name = string("op_1750_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_1741, x = var_1740_cast_fp16)[name = string("transpose_584")]; + tensor var_1750_cast_fp16 = matmul(transpose_x = var_1750_transpose_x_0, transpose_y = var_1750_transpose_y_0, x = var_1748_cast_fp16, y = v_75_cast_fp16)[name = string("op_1750_cast_fp16")]; + tensor var_1751 = const()[name = string("op_1751"), val = tensor([0, 2, 1, 3])]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 1280])]; + tensor var_1752_cast_fp16 = transpose(perm = var_1751, x = var_1750_cast_fp16)[name = string("transpose_581")]; + tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1752_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469429248)))]; + tensor var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472706112)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1757_to_fp16, weight = var_1756_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor([-1])]; + tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472708736)))]; + tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472711360)))]; + tensor var_1764_cast_fp16 = layer_norm(axes = var_1764_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor var_1773_to_fp16 = const()[name = string("op_1773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472713984)))]; + tensor var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475990848)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1774_to_fp16, weight = var_1773_to_fp16, x = var_1764_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; + tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; + tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 20, 64])]; + tensor var_1794_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")]; + tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1794_cast_fp16, y = const_190_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 1500, 20, -1])]; + tensor var_1801_cast_fp16 = reshape(shape = var_1800, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1801_cast_fp16")]; + tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_1801_cast_fp16, y = const_191_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_1807 = const()[name = string("op_1807"), val = tensor([1, 1500, 20, -1])]; + tensor var_1808_cast_fp16 = reshape(shape = var_1807, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1808_cast_fp16")]; + tensor var_1809 = const()[name = string("op_1809"), val = tensor([0, 2, 1, 3])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_288 = transpose(perm = transpose_288_perm_0, x = k_79_cast_fp16)[name = string("transpose_578")]; + tensor transpose_287 = transpose(perm = transpose_287_perm_0, x = q_63_cast_fp16)[name = string("transpose_579")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_47_cast_fp16")]; + tensor var_1813_cast_fp16 = softmax(axis = var_1657, x = qk_47_cast_fp16)[name = string("op_1813_cast_fp16")]; + bool var_1815_transpose_x_0 = const()[name = string("op_1815_transpose_x_0"), val = bool(false)]; + bool var_1815_transpose_y_0 = const()[name = string("op_1815_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_1809, x = var_1808_cast_fp16)[name = string("transpose_580")]; + tensor var_1815_cast_fp16 = matmul(transpose_x = var_1815_transpose_x_0, transpose_y = var_1815_transpose_y_0, x = var_1813_cast_fp16, y = v_79_cast_fp16)[name = string("op_1815_cast_fp16")]; + tensor var_1816 = const()[name = string("op_1816"), val = tensor([0, 2, 1, 3])]; + tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 1280])]; + tensor var_1817_cast_fp16 = transpose(perm = var_1816, x = var_1815_cast_fp16)[name = string("transpose_577")]; + tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1817_cast_fp16)[name = string("x_139_cast_fp16")]; + tensor var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475993472)))]; + tensor var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479270336)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1822_to_fp16, weight = var_1821_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; + tensor var_1829_axes_0 = const()[name = string("op_1829_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479272960)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479275584)))]; + tensor var_1829_cast_fp16 = layer_norm(axes = var_1829_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1829_cast_fp16")]; + tensor var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479278208)))]; + tensor var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492385472)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1839_to_fp16, weight = var_1838_to_fp16, x = var_1829_cast_fp16)[name = string("linear_62_cast_fp16")]; + string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; + tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1844_to_fp16 = const()[name = string("op_1844_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492395776)))]; + tensor var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505503040)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1845_to_fp16, weight = var_1844_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; + tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_33_cast_fp16")]; + tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; + tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_33_cast_fp16")]; + tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; + tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; + tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; + tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; + int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)]; + tensor var_1886_axes_0 = const()[name = string("op_1886_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505505664)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505508288)))]; + fp16 var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1886_cast_fp16 = layer_norm(axes = var_1886_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505510912)))]; + tensor var_1898_to_fp16 = const()[name = string("op_1898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508787776)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1898_to_fp16, weight = var_1897_to_fp16, x = var_1886_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508790400)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1901_to_fp16, x = var_1886_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512067264)))]; + tensor var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515344128)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1906_to_fp16, weight = var_1905_to_fp16, x = var_1886_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1908_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1908_shape_cast_fp16")]; + int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; + int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; + bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; + string var_1908_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1908_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; + tensor var_1908_shape_cast_fp16_to_uint16 = cast(dtype = var_1908_shape_cast_fp16_to_uint16_dtype_0, x = var_1908_shape_cast_fp16)[name = string("cast_374")]; + uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1908_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; + string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_373")]; + int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; + tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; + tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; + tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; + tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; + int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; + bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; + tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; + tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")]; + tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")]; + int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; + int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1280)]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; + tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_186, end_mask = var_1924_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1924_cast_fp16")]; + tensor var_1927_begin_0 = const()[name = string("op_1927_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1927_end_mask_0 = const()[name = string("op_1927_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1927_cast_fp16 = slice_by_index(begin = var_1927_begin_0, end = concat_186, end_mask = var_1927_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1927_cast_fp16")]; + tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 20, 64])]; + tensor var_1937_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1937_cast_fp16")]; + tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1937_cast_fp16, y = const_192_to_fp16)[name = string("q_67_cast_fp16")]; + tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 20, 64])]; + tensor var_1944_cast_fp16 = reshape(shape = concat_189x, x = var_1924_cast_fp16)[name = string("op_1944_cast_fp16")]; + tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_85_cast_fp16 = mul(x = var_1944_cast_fp16, y = const_193_to_fp16)[name = string("k_85_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 20, 64])]; + tensor var_1951_cast_fp16 = reshape(shape = concat_190x, x = var_1927_cast_fp16)[name = string("op_1951_cast_fp16")]; + tensor var_1952 = const()[name = string("op_1952"), val = tensor([0, 2, 1, 3])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_289_perm_0 = const()[name = string("transpose_289_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_290_perm_0 = const()[name = string("transpose_290_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_290 = transpose(perm = transpose_290_perm_0, x = k_85_cast_fp16)[name = string("transpose_574")]; + tensor transpose_289 = transpose(perm = transpose_289_perm_0, x = q_67_cast_fp16)[name = string("transpose_575")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_289, y = transpose_290)[name = string("qk_49_cast_fp16")]; + int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; + int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; + bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; + tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; + tensor var_1955_begin_0 = const()[name = string("op_1955_begin_0"), val = tensor([0, 0])]; + tensor var_1955_end_mask_0 = const()[name = string("op_1955_end_mask_0"), val = tensor([false, true])]; + tensor var_1955_cast_fp16 = slice_by_index(begin = var_1955_begin_0, end = concat_191, end_mask = var_1955_end_mask_0, x = mask_to_fp16)[name = string("op_1955_cast_fp16")]; + int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; + tensor var_1956_begin_0 = const()[name = string("op_1956_begin_0"), val = tensor([0, 0])]; + tensor var_1956_end_mask_0 = const()[name = string("op_1956_end_mask_0"), val = tensor([true, false])]; + tensor var_1956_cast_fp16 = slice_by_index(begin = var_1956_begin_0, end = concat_192, end_mask = var_1956_end_mask_0, x = var_1955_cast_fp16)[name = string("op_1956_cast_fp16")]; + tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1956_cast_fp16)[name = string("qk_51_cast_fp16")]; + tensor var_1959_cast_fp16 = softmax(axis = var_1868, x = qk_51_cast_fp16)[name = string("op_1959_cast_fp16")]; + bool var_1961_transpose_x_0 = const()[name = string("op_1961_transpose_x_0"), val = bool(false)]; + bool var_1961_transpose_y_0 = const()[name = string("op_1961_transpose_y_0"), val = bool(false)]; + tensor v_85_cast_fp16 = transpose(perm = var_1952, x = var_1951_cast_fp16)[name = string("transpose_576")]; + tensor var_1961_cast_fp16 = matmul(transpose_x = var_1961_transpose_x_0, transpose_y = var_1961_transpose_y_0, x = var_1959_cast_fp16, y = v_85_cast_fp16)[name = string("op_1961_cast_fp16")]; + tensor var_1962 = const()[name = string("op_1962"), val = tensor([0, 2, 1, 3])]; + tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 1280])]; + tensor var_1963_cast_fp16 = transpose(perm = var_1962, x = var_1961_cast_fp16)[name = string("transpose_573")]; + tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1963_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515346752)))]; + tensor var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518623616)))]; + tensor linear_67_cast_fp16 = linear(bias = var_1968_to_fp16, weight = var_1967_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor([-1])]; + tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518626240)))]; + tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518628864)))]; + tensor var_1975_cast_fp16 = layer_norm(axes = var_1975_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1975_cast_fp16")]; + tensor var_1984_to_fp16 = const()[name = string("op_1984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518631488)))]; + tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521908352)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1985_to_fp16, weight = var_1984_to_fp16, x = var_1975_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; + tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; + tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; + tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 20, 64])]; + tensor var_2005_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_2005_cast_fp16")]; + tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_2005_cast_fp16, y = const_194_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_2011 = const()[name = string("op_2011"), val = tensor([1, 1500, 20, -1])]; + tensor var_2012_cast_fp16 = reshape(shape = var_2011, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2012_cast_fp16")]; + tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_89_cast_fp16 = mul(x = var_2012_cast_fp16, y = const_195_to_fp16)[name = string("k_89_cast_fp16")]; + tensor var_2018 = const()[name = string("op_2018"), val = tensor([1, 1500, 20, -1])]; + tensor var_2019_cast_fp16 = reshape(shape = var_2018, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2019_cast_fp16")]; + tensor var_2020 = const()[name = string("op_2020"), val = tensor([0, 2, 1, 3])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_291_perm_0 = const()[name = string("transpose_291_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_292_perm_0 = const()[name = string("transpose_292_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_292 = transpose(perm = transpose_292_perm_0, x = k_89_cast_fp16)[name = string("transpose_570")]; + tensor transpose_291 = transpose(perm = transpose_291_perm_0, x = q_71_cast_fp16)[name = string("transpose_571")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_291, y = transpose_292)[name = string("qk_53_cast_fp16")]; + tensor var_2024_cast_fp16 = softmax(axis = var_1868, x = qk_53_cast_fp16)[name = string("op_2024_cast_fp16")]; + bool var_2026_transpose_x_0 = const()[name = string("op_2026_transpose_x_0"), val = bool(false)]; + bool var_2026_transpose_y_0 = const()[name = string("op_2026_transpose_y_0"), val = bool(false)]; + tensor v_89_cast_fp16 = transpose(perm = var_2020, x = var_2019_cast_fp16)[name = string("transpose_572")]; + tensor var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_0, transpose_y = var_2026_transpose_y_0, x = var_2024_cast_fp16, y = v_89_cast_fp16)[name = string("op_2026_cast_fp16")]; + tensor var_2027 = const()[name = string("op_2027"), val = tensor([0, 2, 1, 3])]; + tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 1280])]; + tensor var_2028_cast_fp16 = transpose(perm = var_2027, x = var_2026_cast_fp16)[name = string("transpose_569")]; + tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_2028_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521910976)))]; + tensor var_2033_to_fp16 = const()[name = string("op_2033_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525187840)))]; + tensor linear_69_cast_fp16 = linear(bias = var_2033_to_fp16, weight = var_2032_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525190464)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525193088)))]; + tensor var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2040_cast_fp16")]; + tensor var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525195712)))]; + tensor var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538302976)))]; + tensor linear_70_cast_fp16 = linear(bias = var_2050_to_fp16, weight = var_2049_to_fp16, x = var_2040_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; + tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_2055_to_fp16 = const()[name = string("op_2055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538313280)))]; + tensor var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551420544)))]; + tensor linear_71_cast_fp16 = linear(bias = var_2056_to_fp16, weight = var_2055_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; + tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_37_cast_fp16")]; + tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; + tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_37_cast_fp16")]; + tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; + tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; + tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; + tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; + int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)]; + tensor var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551423168)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551425792)))]; + fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2097_cast_fp16 = layer_norm(axes = var_2097_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2097_cast_fp16")]; + tensor var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551428416)))]; + tensor var_2109_to_fp16 = const()[name = string("op_2109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554705280)))]; + tensor linear_72_cast_fp16 = linear(bias = var_2109_to_fp16, weight = var_2108_to_fp16, x = var_2097_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554707904)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2112_to_fp16, x = var_2097_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557984768)))]; + tensor var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561261632)))]; + tensor linear_74_cast_fp16 = linear(bias = var_2117_to_fp16, weight = var_2116_to_fp16, x = var_2097_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2119_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2119_shape_cast_fp16")]; + int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; + int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; + bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; + string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; + tensor var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_372")]; + uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; + string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_371")]; + int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; + tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; + tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; + tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")]; + tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")]; + int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; + int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1280)]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; + tensor var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_208, end_mask = var_2135_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2135_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = string("op_2138_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2138_end_mask_0 = const()[name = string("op_2138_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = concat_208, end_mask = var_2138_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2138_cast_fp16")]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 20, 64])]; + tensor var_2148_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2148_cast_fp16")]; + tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2148_cast_fp16, y = const_196_to_fp16)[name = string("q_75_cast_fp16")]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 20, 64])]; + tensor var_2155_cast_fp16 = reshape(shape = concat_211x, x = var_2135_cast_fp16)[name = string("op_2155_cast_fp16")]; + tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2155_cast_fp16, y = const_197_to_fp16)[name = string("k_95_cast_fp16")]; + tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 20, 64])]; + tensor var_2162_cast_fp16 = reshape(shape = concat_212x, x = var_2138_cast_fp16)[name = string("op_2162_cast_fp16")]; + tensor var_2163 = const()[name = string("op_2163"), val = tensor([0, 2, 1, 3])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_293_perm_0 = const()[name = string("transpose_293_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_294_perm_0 = const()[name = string("transpose_294_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_294 = transpose(perm = transpose_294_perm_0, x = k_95_cast_fp16)[name = string("transpose_566")]; + tensor transpose_293 = transpose(perm = transpose_293_perm_0, x = q_75_cast_fp16)[name = string("transpose_567")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_293, y = transpose_294)[name = string("qk_55_cast_fp16")]; + int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; + int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; + bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; + tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; + tensor var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor([0, 0])]; + tensor var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor([false, true])]; + tensor var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = concat_213, end_mask = var_2166_end_mask_0, x = mask_to_fp16)[name = string("op_2166_cast_fp16")]; + int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; + tensor var_2167_begin_0 = const()[name = string("op_2167_begin_0"), val = tensor([0, 0])]; + tensor var_2167_end_mask_0 = const()[name = string("op_2167_end_mask_0"), val = tensor([true, false])]; + tensor var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = concat_214, end_mask = var_2167_end_mask_0, x = var_2166_cast_fp16)[name = string("op_2167_cast_fp16")]; + tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2167_cast_fp16)[name = string("qk_57_cast_fp16")]; + tensor var_2170_cast_fp16 = softmax(axis = var_2079, x = qk_57_cast_fp16)[name = string("op_2170_cast_fp16")]; + bool var_2172_transpose_x_0 = const()[name = string("op_2172_transpose_x_0"), val = bool(false)]; + bool var_2172_transpose_y_0 = const()[name = string("op_2172_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2163, x = var_2162_cast_fp16)[name = string("transpose_568")]; + tensor var_2172_cast_fp16 = matmul(transpose_x = var_2172_transpose_x_0, transpose_y = var_2172_transpose_y_0, x = var_2170_cast_fp16, y = v_95_cast_fp16)[name = string("op_2172_cast_fp16")]; + tensor var_2173 = const()[name = string("op_2173"), val = tensor([0, 2, 1, 3])]; + tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 1280])]; + tensor var_2174_cast_fp16 = transpose(perm = var_2173, x = var_2172_cast_fp16)[name = string("transpose_565")]; + tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2174_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561264256)))]; + tensor var_2179_to_fp16 = const()[name = string("op_2179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564541120)))]; + tensor linear_75_cast_fp16 = linear(bias = var_2179_to_fp16, weight = var_2178_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; + tensor var_2186_axes_0 = const()[name = string("op_2186_axes_0"), val = tensor([-1])]; + tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564543744)))]; + tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564546368)))]; + tensor var_2186_cast_fp16 = layer_norm(axes = var_2186_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2186_cast_fp16")]; + tensor var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564548992)))]; + tensor var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567825856)))]; + tensor linear_76_cast_fp16 = linear(bias = var_2196_to_fp16, weight = var_2195_to_fp16, x = var_2186_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; + tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; + tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; + tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; + tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 20, 64])]; + tensor var_2216_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2216_cast_fp16")]; + tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2216_cast_fp16, y = const_198_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2222 = const()[name = string("op_2222"), val = tensor([1, 1500, 20, -1])]; + tensor var_2223_cast_fp16 = reshape(shape = var_2222, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2223_cast_fp16")]; + tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2223_cast_fp16, y = const_199_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2229 = const()[name = string("op_2229"), val = tensor([1, 1500, 20, -1])]; + tensor var_2230_cast_fp16 = reshape(shape = var_2229, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2230_cast_fp16")]; + tensor var_2231 = const()[name = string("op_2231"), val = tensor([0, 2, 1, 3])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_295_perm_0 = const()[name = string("transpose_295_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_296_perm_0 = const()[name = string("transpose_296_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_296 = transpose(perm = transpose_296_perm_0, x = k_99_cast_fp16)[name = string("transpose_562")]; + tensor transpose_295 = transpose(perm = transpose_295_perm_0, x = q_79_cast_fp16)[name = string("transpose_563")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_295, y = transpose_296)[name = string("qk_59_cast_fp16")]; + tensor var_2235_cast_fp16 = softmax(axis = var_2079, x = qk_59_cast_fp16)[name = string("op_2235_cast_fp16")]; + bool var_2237_transpose_x_0 = const()[name = string("op_2237_transpose_x_0"), val = bool(false)]; + bool var_2237_transpose_y_0 = const()[name = string("op_2237_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2231, x = var_2230_cast_fp16)[name = string("transpose_564")]; + tensor var_2237_cast_fp16 = matmul(transpose_x = var_2237_transpose_x_0, transpose_y = var_2237_transpose_y_0, x = var_2235_cast_fp16, y = v_99_cast_fp16)[name = string("op_2237_cast_fp16")]; + tensor var_2238 = const()[name = string("op_2238"), val = tensor([0, 2, 1, 3])]; + tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 1280])]; + tensor var_2239_cast_fp16 = transpose(perm = var_2238, x = var_2237_cast_fp16)[name = string("transpose_561")]; + tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2239_cast_fp16)[name = string("x_175_cast_fp16")]; + tensor var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567828480)))]; + tensor var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571105344)))]; + tensor linear_77_cast_fp16 = linear(bias = var_2244_to_fp16, weight = var_2243_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; + tensor var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107968)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571110592)))]; + tensor var_2251_cast_fp16 = layer_norm(axes = var_2251_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2251_cast_fp16")]; + tensor var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571113216)))]; + tensor var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584220480)))]; + tensor linear_78_cast_fp16 = linear(bias = var_2261_to_fp16, weight = var_2260_to_fp16, x = var_2251_cast_fp16)[name = string("linear_78_cast_fp16")]; + string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; + tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_2266_to_fp16 = const()[name = string("op_2266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584230784)))]; + tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597338048)))]; + tensor linear_79_cast_fp16 = linear(bias = var_2267_to_fp16, weight = var_2266_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; + tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; + tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_41_cast_fp16")]; + tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; + tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_41_cast_fp16")]; + tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; + tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; + tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; + tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; + int32 var_2290 = const()[name = string("op_2290"), val = int32(-1)]; + tensor var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597340672)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597343296)))]; + fp16 var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2308_cast_fp16")]; + tensor var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597345920)))]; + tensor var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600622784)))]; + tensor linear_80_cast_fp16 = linear(bias = var_2320_to_fp16, weight = var_2319_to_fp16, x = var_2308_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600625408)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2323_to_fp16, x = var_2308_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603902272)))]; + tensor var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607179136)))]; + tensor linear_82_cast_fp16 = linear(bias = var_2328_to_fp16, weight = var_2327_to_fp16, x = var_2308_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor var_2330_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2330_shape_cast_fp16")]; + int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; + int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; + bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; + string var_2330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; + tensor var_2330_shape_cast_fp16_to_uint16 = cast(dtype = var_2330_shape_cast_fp16_to_uint16_dtype_0, x = var_2330_shape_cast_fp16)[name = string("cast_370")]; + uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2330_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; + string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_369")]; + int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; + tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; + tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; + tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; + tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; + int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; + bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; + tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; + tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")]; + tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")]; + int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; + int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1280)]; + int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; + bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; + tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; + tensor var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_230, end_mask = var_2346_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2346_cast_fp16")]; + tensor var_2349_begin_0 = const()[name = string("op_2349_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2349_end_mask_0 = const()[name = string("op_2349_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = concat_230, end_mask = var_2349_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2349_cast_fp16")]; + tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 20, 64])]; + tensor var_2359_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2359_cast_fp16")]; + tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2359_cast_fp16, y = const_200_to_fp16)[name = string("q_83_cast_fp16")]; + tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 20, 64])]; + tensor var_2366_cast_fp16 = reshape(shape = concat_233x, x = var_2346_cast_fp16)[name = string("op_2366_cast_fp16")]; + tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_105_cast_fp16 = mul(x = var_2366_cast_fp16, y = const_201_to_fp16)[name = string("k_105_cast_fp16")]; + tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 20, 64])]; + tensor var_2373_cast_fp16 = reshape(shape = concat_234x, x = var_2349_cast_fp16)[name = string("op_2373_cast_fp16")]; + tensor var_2374 = const()[name = string("op_2374"), val = tensor([0, 2, 1, 3])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_297_perm_0 = const()[name = string("transpose_297_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_298_perm_0 = const()[name = string("transpose_298_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_298 = transpose(perm = transpose_298_perm_0, x = k_105_cast_fp16)[name = string("transpose_558")]; + tensor transpose_297 = transpose(perm = transpose_297_perm_0, x = q_83_cast_fp16)[name = string("transpose_559")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_297, y = transpose_298)[name = string("qk_61_cast_fp16")]; + int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; + int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; + bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; + tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; + tensor var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor([0, 0])]; + tensor var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor([false, true])]; + tensor var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = concat_235, end_mask = var_2377_end_mask_0, x = mask_to_fp16)[name = string("op_2377_cast_fp16")]; + int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; + tensor var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor([0, 0])]; + tensor var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor([true, false])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = concat_236, end_mask = var_2378_end_mask_0, x = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")]; + tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2378_cast_fp16)[name = string("qk_63_cast_fp16")]; + tensor var_2381_cast_fp16 = softmax(axis = var_2290, x = qk_63_cast_fp16)[name = string("op_2381_cast_fp16")]; + bool var_2383_transpose_x_0 = const()[name = string("op_2383_transpose_x_0"), val = bool(false)]; + bool var_2383_transpose_y_0 = const()[name = string("op_2383_transpose_y_0"), val = bool(false)]; + tensor v_105_cast_fp16 = transpose(perm = var_2374, x = var_2373_cast_fp16)[name = string("transpose_560")]; + tensor var_2383_cast_fp16 = matmul(transpose_x = var_2383_transpose_x_0, transpose_y = var_2383_transpose_y_0, x = var_2381_cast_fp16, y = v_105_cast_fp16)[name = string("op_2383_cast_fp16")]; + tensor var_2384 = const()[name = string("op_2384"), val = tensor([0, 2, 1, 3])]; + tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 1280])]; + tensor var_2385_cast_fp16 = transpose(perm = var_2384, x = var_2383_cast_fp16)[name = string("transpose_557")]; + tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2385_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607181760)))]; + tensor var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610458624)))]; + tensor linear_83_cast_fp16 = linear(bias = var_2390_to_fp16, weight = var_2389_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; + tensor var_2397_axes_0 = const()[name = string("op_2397_axes_0"), val = tensor([-1])]; + tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610461248)))]; + tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610463872)))]; + tensor var_2397_cast_fp16 = layer_norm(axes = var_2397_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2397_cast_fp16")]; + tensor var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610466496)))]; + tensor var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613743360)))]; + tensor linear_84_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = var_2397_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; + tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; + tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; + tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; + tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 20, 64])]; + tensor var_2427_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2427_cast_fp16")]; + tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2427_cast_fp16, y = const_202_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2433 = const()[name = string("op_2433"), val = tensor([1, 1500, 20, -1])]; + tensor var_2434_cast_fp16 = reshape(shape = var_2433, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2434_cast_fp16")]; + tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_109_cast_fp16 = mul(x = var_2434_cast_fp16, y = const_203_to_fp16)[name = string("k_109_cast_fp16")]; + tensor var_2440 = const()[name = string("op_2440"), val = tensor([1, 1500, 20, -1])]; + tensor var_2441_cast_fp16 = reshape(shape = var_2440, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2441_cast_fp16")]; + tensor var_2442 = const()[name = string("op_2442"), val = tensor([0, 2, 1, 3])]; + bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; + bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; + tensor transpose_299_perm_0 = const()[name = string("transpose_299_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_300_perm_0 = const()[name = string("transpose_300_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_300 = transpose(perm = transpose_300_perm_0, x = k_109_cast_fp16)[name = string("transpose_554")]; + tensor transpose_299 = transpose(perm = transpose_299_perm_0, x = q_87_cast_fp16)[name = string("transpose_555")]; + tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_299, y = transpose_300)[name = string("qk_65_cast_fp16")]; + tensor var_2446_cast_fp16 = softmax(axis = var_2290, x = qk_65_cast_fp16)[name = string("op_2446_cast_fp16")]; + bool var_2448_transpose_x_0 = const()[name = string("op_2448_transpose_x_0"), val = bool(false)]; + bool var_2448_transpose_y_0 = const()[name = string("op_2448_transpose_y_0"), val = bool(false)]; + tensor v_109_cast_fp16 = transpose(perm = var_2442, x = var_2441_cast_fp16)[name = string("transpose_556")]; + tensor var_2448_cast_fp16 = matmul(transpose_x = var_2448_transpose_x_0, transpose_y = var_2448_transpose_y_0, x = var_2446_cast_fp16, y = v_109_cast_fp16)[name = string("op_2448_cast_fp16")]; + tensor var_2449 = const()[name = string("op_2449"), val = tensor([0, 2, 1, 3])]; + tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 1280])]; + tensor var_2450_cast_fp16 = transpose(perm = var_2449, x = var_2448_cast_fp16)[name = string("transpose_553")]; + tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2450_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613745984)))]; + tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617022848)))]; + tensor linear_85_cast_fp16 = linear(bias = var_2455_to_fp16, weight = var_2454_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617025472)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617028096)))]; + tensor var_2462_cast_fp16 = layer_norm(axes = var_2462_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2462_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617030720)))]; + tensor var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630137984)))]; + tensor linear_86_cast_fp16 = linear(bias = var_2472_to_fp16, weight = var_2471_to_fp16, x = var_2462_cast_fp16)[name = string("linear_86_cast_fp16")]; + string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; + tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; + tensor var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630148288)))]; + tensor var_2478_to_fp16 = const()[name = string("op_2478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643255552)))]; + tensor linear_87_cast_fp16 = linear(bias = var_2478_to_fp16, weight = var_2477_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; + tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_45_cast_fp16")]; + tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; + tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_45_cast_fp16")]; + tensor k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; + tensor k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")]; + tensor v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; + tensor v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")]; + int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)]; + tensor var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643258176)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643260800)))]; + fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2519_cast_fp16 = layer_norm(axes = var_2519_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2519_cast_fp16")]; + tensor var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643263424)))]; + tensor var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646540288)))]; + tensor linear_88_cast_fp16 = linear(bias = var_2531_to_fp16, weight = var_2530_to_fp16, x = var_2519_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2534_to_fp16 = const()[name = string("op_2534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646542912)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2534_to_fp16, x = var_2519_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649819776)))]; + tensor var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653096640)))]; + tensor linear_90_cast_fp16 = linear(bias = var_2539_to_fp16, weight = var_2538_to_fp16, x = var_2519_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_2541_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2541_shape_cast_fp16")]; + int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; + int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; + bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; + string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; + tensor var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_368")]; + uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; + string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_367")]; + int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")]; + tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")]; + tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; + tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; + tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; + tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; + int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; + bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; + tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; + tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")]; + tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")]; + int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; + int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1280)]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")]; + tensor var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_252, end_mask = var_2557_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2557_cast_fp16")]; + tensor var_2560_begin_0 = const()[name = string("op_2560_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2560_end_mask_0 = const()[name = string("op_2560_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2560_cast_fp16 = slice_by_index(begin = var_2560_begin_0, end = concat_252, end_mask = var_2560_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2560_cast_fp16")]; + tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 20, 64])]; + tensor var_2570_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2570_cast_fp16")]; + tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2570_cast_fp16, y = const_204_to_fp16)[name = string("q_91_cast_fp16")]; + tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 20, 64])]; + tensor var_2577_cast_fp16 = reshape(shape = concat_255x, x = var_2557_cast_fp16)[name = string("op_2577_cast_fp16")]; + tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_2577_cast_fp16, y = const_205_to_fp16)[name = string("k_115_cast_fp16")]; + tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 20, 64])]; + tensor var_2584_cast_fp16 = reshape(shape = concat_256x, x = var_2560_cast_fp16)[name = string("op_2584_cast_fp16")]; + tensor var_2585 = const()[name = string("op_2585"), val = tensor([0, 2, 1, 3])]; + bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; + bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; + tensor transpose_301_perm_0 = const()[name = string("transpose_301_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_302_perm_0 = const()[name = string("transpose_302_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_302 = transpose(perm = transpose_302_perm_0, x = k_115_cast_fp16)[name = string("transpose_550")]; + tensor transpose_301 = transpose(perm = transpose_301_perm_0, x = q_91_cast_fp16)[name = string("transpose_551")]; + tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_301, y = transpose_302)[name = string("qk_67_cast_fp16")]; + int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; + int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; + bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; + tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; + tensor var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor([0, 0])]; + tensor var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor([false, true])]; + tensor var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = concat_257, end_mask = var_2588_end_mask_0, x = mask_to_fp16)[name = string("op_2588_cast_fp16")]; + int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; + tensor var_2589_begin_0 = const()[name = string("op_2589_begin_0"), val = tensor([0, 0])]; + tensor var_2589_end_mask_0 = const()[name = string("op_2589_end_mask_0"), val = tensor([true, false])]; + tensor var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = concat_258, end_mask = var_2589_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2589_cast_fp16")]; + tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2589_cast_fp16)[name = string("qk_69_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_2501, x = qk_69_cast_fp16)[name = string("op_2592_cast_fp16")]; + bool var_2594_transpose_x_0 = const()[name = string("op_2594_transpose_x_0"), val = bool(false)]; + bool var_2594_transpose_y_0 = const()[name = string("op_2594_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_2585, x = var_2584_cast_fp16)[name = string("transpose_552")]; + tensor var_2594_cast_fp16 = matmul(transpose_x = var_2594_transpose_x_0, transpose_y = var_2594_transpose_y_0, x = var_2592_cast_fp16, y = v_115_cast_fp16)[name = string("op_2594_cast_fp16")]; + tensor var_2595 = const()[name = string("op_2595"), val = tensor([0, 2, 1, 3])]; + tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 1280])]; + tensor var_2596_cast_fp16 = transpose(perm = var_2595, x = var_2594_cast_fp16)[name = string("transpose_549")]; + tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2596_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_2600_to_fp16 = const()[name = string("op_2600_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653099264)))]; + tensor var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656376128)))]; + tensor linear_91_cast_fp16 = linear(bias = var_2601_to_fp16, weight = var_2600_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_2608_axes_0 = const()[name = string("op_2608_axes_0"), val = tensor([-1])]; + tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656378752)))]; + tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656381376)))]; + tensor var_2608_cast_fp16 = layer_norm(axes = var_2608_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2608_cast_fp16")]; + tensor var_2617_to_fp16 = const()[name = string("op_2617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656384000)))]; + tensor var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659660864)))]; + tensor linear_92_cast_fp16 = linear(bias = var_2618_to_fp16, weight = var_2617_to_fp16, x = var_2608_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; + tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; + tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; + tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; + tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 20, 64])]; + tensor var_2638_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2638_cast_fp16")]; + tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_95_cast_fp16 = mul(x = var_2638_cast_fp16, y = const_206_to_fp16)[name = string("q_95_cast_fp16")]; + tensor var_2644 = const()[name = string("op_2644"), val = tensor([1, 1500, 20, -1])]; + tensor var_2645_cast_fp16 = reshape(shape = var_2644, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2645_cast_fp16")]; + tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_119_cast_fp16 = mul(x = var_2645_cast_fp16, y = const_207_to_fp16)[name = string("k_119_cast_fp16")]; + tensor var_2651 = const()[name = string("op_2651"), val = tensor([1, 1500, 20, -1])]; + tensor var_2652_cast_fp16 = reshape(shape = var_2651, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2652_cast_fp16")]; + tensor var_2653 = const()[name = string("op_2653"), val = tensor([0, 2, 1, 3])]; + bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)]; + bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)]; + tensor transpose_303_perm_0 = const()[name = string("transpose_303_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_304_perm_0 = const()[name = string("transpose_304_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_304 = transpose(perm = transpose_304_perm_0, x = k_119_cast_fp16)[name = string("transpose_546")]; + tensor transpose_303 = transpose(perm = transpose_303_perm_0, x = q_95_cast_fp16)[name = string("transpose_547")]; + tensor qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_303, y = transpose_304)[name = string("qk_71_cast_fp16")]; + tensor var_2657_cast_fp16 = softmax(axis = var_2501, x = qk_71_cast_fp16)[name = string("op_2657_cast_fp16")]; + bool var_2659_transpose_x_0 = const()[name = string("op_2659_transpose_x_0"), val = bool(false)]; + bool var_2659_transpose_y_0 = const()[name = string("op_2659_transpose_y_0"), val = bool(false)]; + tensor v_119_cast_fp16 = transpose(perm = var_2653, x = var_2652_cast_fp16)[name = string("transpose_548")]; + tensor var_2659_cast_fp16 = matmul(transpose_x = var_2659_transpose_x_0, transpose_y = var_2659_transpose_y_0, x = var_2657_cast_fp16, y = v_119_cast_fp16)[name = string("op_2659_cast_fp16")]; + tensor var_2660 = const()[name = string("op_2660"), val = tensor([0, 2, 1, 3])]; + tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 1280])]; + tensor var_2661_cast_fp16 = transpose(perm = var_2660, x = var_2659_cast_fp16)[name = string("transpose_545")]; + tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2661_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659663488)))]; + tensor var_2666_to_fp16 = const()[name = string("op_2666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662940352)))]; + tensor linear_93_cast_fp16 = linear(bias = var_2666_to_fp16, weight = var_2665_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_2673_axes_0 = const()[name = string("op_2673_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662942976)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662945600)))]; + tensor var_2673_cast_fp16 = layer_norm(axes = var_2673_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2673_cast_fp16")]; + tensor var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662948224)))]; + tensor var_2683_to_fp16 = const()[name = string("op_2683_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676055488)))]; + tensor linear_94_cast_fp16 = linear(bias = var_2683_to_fp16, weight = var_2682_to_fp16, x = var_2673_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; + tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676065792)))]; + tensor var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689173056)))]; + tensor linear_95_cast_fp16 = linear(bias = var_2689_to_fp16, weight = var_2688_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; + tensor k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_49_cast_fp16")]; + tensor v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; + tensor v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_49_cast_fp16")]; + tensor k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; + tensor k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")]; + tensor v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; + tensor v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")]; + int32 var_2712 = const()[name = string("op_2712"), val = int32(-1)]; + tensor var_2730_axes_0 = const()[name = string("op_2730_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689175680)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689178304)))]; + fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2730_cast_fp16 = layer_norm(axes = var_2730_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2730_cast_fp16")]; + tensor var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689180928)))]; + tensor var_2742_to_fp16 = const()[name = string("op_2742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692457792)))]; + tensor linear_96_cast_fp16 = linear(bias = var_2742_to_fp16, weight = var_2741_to_fp16, x = var_2730_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692460416)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2745_to_fp16, x = var_2730_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_2749_to_fp16 = const()[name = string("op_2749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695737280)))]; + tensor var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699014144)))]; + tensor linear_98_cast_fp16 = linear(bias = var_2750_to_fp16, weight = var_2749_to_fp16, x = var_2730_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_2752_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2752_shape_cast_fp16")]; + int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)]; + int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)]; + bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)]; + string var_2752_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2752_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)]; + tensor var_2752_shape_cast_fp16_to_uint16 = cast(dtype = var_2752_shape_cast_fp16_to_uint16_dtype_0, x = var_2752_shape_cast_fp16)[name = string("cast_366")]; + uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2752_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")]; + string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_365")]; + int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([0])]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")]; + tensor concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor([12])]; + int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; + bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; + tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")]; + tensor concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor([0])]; + tensor concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor([0])]; + tensor concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor([0])]; + int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; + bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; + tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")]; + tensor k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")]; + tensor v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")]; + int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)]; + int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1280)]; + int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)]; + bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)]; + tensor concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")]; + tensor var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_274, end_mask = var_2768_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2768_cast_fp16")]; + tensor var_2771_begin_0 = const()[name = string("op_2771_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2771_end_mask_0 = const()[name = string("op_2771_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = concat_274, end_mask = var_2771_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2771_cast_fp16")]; + tensor concat_276x = const()[name = string("concat_276x"), val = tensor([1, -1, 20, 64])]; + tensor var_2781_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2781_cast_fp16")]; + tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_99_cast_fp16 = mul(x = var_2781_cast_fp16, y = const_208_to_fp16)[name = string("q_99_cast_fp16")]; + tensor concat_277x = const()[name = string("concat_277x"), val = tensor([1, -1, 20, 64])]; + tensor var_2788_cast_fp16 = reshape(shape = concat_277x, x = var_2768_cast_fp16)[name = string("op_2788_cast_fp16")]; + tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_125_cast_fp16 = mul(x = var_2788_cast_fp16, y = const_209_to_fp16)[name = string("k_125_cast_fp16")]; + tensor concat_278x = const()[name = string("concat_278x"), val = tensor([1, -1, 20, 64])]; + tensor var_2795_cast_fp16 = reshape(shape = concat_278x, x = var_2771_cast_fp16)[name = string("op_2795_cast_fp16")]; + tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1, 3])]; + bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)]; + bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)]; + tensor transpose_305_perm_0 = const()[name = string("transpose_305_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_306_perm_0 = const()[name = string("transpose_306_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_306 = transpose(perm = transpose_306_perm_0, x = k_125_cast_fp16)[name = string("transpose_542")]; + tensor transpose_305 = transpose(perm = transpose_305_perm_0, x = q_99_cast_fp16)[name = string("transpose_543")]; + tensor qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_305, y = transpose_306)[name = string("qk_73_cast_fp16")]; + int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)]; + int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; + bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")]; + tensor var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor([0, 0])]; + tensor var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor([false, true])]; + tensor var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = concat_279, end_mask = var_2799_end_mask_0, x = mask_to_fp16)[name = string("op_2799_cast_fp16")]; + int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)]; + int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; + bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; + tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")]; + tensor var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor([0, 0])]; + tensor var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor([true, false])]; + tensor var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = concat_280, end_mask = var_2800_end_mask_0, x = var_2799_cast_fp16)[name = string("op_2800_cast_fp16")]; + tensor qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2800_cast_fp16)[name = string("qk_75_cast_fp16")]; + tensor var_2803_cast_fp16 = softmax(axis = var_2712, x = qk_75_cast_fp16)[name = string("op_2803_cast_fp16")]; + bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)]; + bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)]; + tensor v_125_cast_fp16 = transpose(perm = var_2796, x = var_2795_cast_fp16)[name = string("transpose_544")]; + tensor var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2803_cast_fp16, y = v_125_cast_fp16)[name = string("op_2805_cast_fp16")]; + tensor var_2806 = const()[name = string("op_2806"), val = tensor([0, 2, 1, 3])]; + tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, -1, 1280])]; + tensor var_2807_cast_fp16 = transpose(perm = var_2806, x = var_2805_cast_fp16)[name = string("transpose_541")]; + tensor x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2807_cast_fp16)[name = string("x_223_cast_fp16")]; + tensor var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699016768)))]; + tensor var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702293632)))]; + tensor linear_99_cast_fp16 = linear(bias = var_2812_to_fp16, weight = var_2811_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")]; + tensor var_2819_axes_0 = const()[name = string("op_2819_axes_0"), val = tensor([-1])]; + tensor blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702296256)))]; + tensor blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702298880)))]; + tensor var_2819_cast_fp16 = layer_norm(axes = var_2819_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2819_cast_fp16")]; + tensor var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702301504)))]; + tensor var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705578368)))]; + tensor linear_100_cast_fp16 = linear(bias = var_2829_to_fp16, weight = var_2828_to_fp16, x = var_2819_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([0, 0, 0])]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([0, 1500, 0])]; + tensor k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_284 = const()[name = string("concat_284"), val = tensor([0, 0, 0])]; + tensor concat_285 = const()[name = string("concat_285"), val = tensor([0, 1500, 0])]; + tensor v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 20, 64])]; + tensor var_2849_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2849_cast_fp16")]; + tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_103_cast_fp16 = mul(x = var_2849_cast_fp16, y = const_210_to_fp16)[name = string("q_103_cast_fp16")]; + tensor var_2855 = const()[name = string("op_2855"), val = tensor([1, 1500, 20, -1])]; + tensor var_2856_cast_fp16 = reshape(shape = var_2855, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2856_cast_fp16")]; + tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_129_cast_fp16 = mul(x = var_2856_cast_fp16, y = const_211_to_fp16)[name = string("k_129_cast_fp16")]; + tensor var_2862 = const()[name = string("op_2862"), val = tensor([1, 1500, 20, -1])]; + tensor var_2863_cast_fp16 = reshape(shape = var_2862, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2863_cast_fp16")]; + tensor var_2864 = const()[name = string("op_2864"), val = tensor([0, 2, 1, 3])]; + bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)]; + bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)]; + tensor transpose_307_perm_0 = const()[name = string("transpose_307_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_308_perm_0 = const()[name = string("transpose_308_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_308 = transpose(perm = transpose_308_perm_0, x = k_129_cast_fp16)[name = string("transpose_538")]; + tensor transpose_307 = transpose(perm = transpose_307_perm_0, x = q_103_cast_fp16)[name = string("transpose_539")]; + tensor qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_307, y = transpose_308)[name = string("qk_77_cast_fp16")]; + tensor var_2868_cast_fp16 = softmax(axis = var_2712, x = qk_77_cast_fp16)[name = string("op_2868_cast_fp16")]; + bool var_2870_transpose_x_0 = const()[name = string("op_2870_transpose_x_0"), val = bool(false)]; + bool var_2870_transpose_y_0 = const()[name = string("op_2870_transpose_y_0"), val = bool(false)]; + tensor v_129_cast_fp16 = transpose(perm = var_2864, x = var_2863_cast_fp16)[name = string("transpose_540")]; + tensor var_2870_cast_fp16 = matmul(transpose_x = var_2870_transpose_x_0, transpose_y = var_2870_transpose_y_0, x = var_2868_cast_fp16, y = v_129_cast_fp16)[name = string("op_2870_cast_fp16")]; + tensor var_2871 = const()[name = string("op_2871"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 1280])]; + tensor var_2872_cast_fp16 = transpose(perm = var_2871, x = var_2870_cast_fp16)[name = string("transpose_537")]; + tensor x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2872_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705580992)))]; + tensor var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708857856)))]; + tensor linear_101_cast_fp16 = linear(bias = var_2877_to_fp16, weight = var_2876_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")]; + tensor var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708860480)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708863104)))]; + tensor var_2884_cast_fp16 = layer_norm(axes = var_2884_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2884_cast_fp16")]; + tensor var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708865728)))]; + tensor var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721972992)))]; + tensor linear_102_cast_fp16 = linear(bias = var_2894_to_fp16, weight = var_2893_to_fp16, x = var_2884_cast_fp16)[name = string("linear_102_cast_fp16")]; + string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")]; + tensor x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721983296)))]; + tensor var_2900_to_fp16 = const()[name = string("op_2900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735090560)))]; + tensor linear_103_cast_fp16 = linear(bias = var_2900_to_fp16, weight = var_2899_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; + tensor k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_53_cast_fp16")]; + tensor v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; + tensor v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_53_cast_fp16")]; + tensor k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; + tensor k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")]; + tensor v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; + tensor v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")]; + int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)]; + tensor var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735093184)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735095808)))]; + fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2941_cast_fp16 = layer_norm(axes = var_2941_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2941_cast_fp16")]; + tensor var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735098432)))]; + tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738375296)))]; + tensor linear_104_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = var_2941_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738377920)))]; + tensor linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2956_to_fp16, x = var_2941_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor var_2960_to_fp16 = const()[name = string("op_2960_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741654784)))]; + tensor var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744931648)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2961_to_fp16, weight = var_2960_to_fp16, x = var_2941_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor var_2963_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2963_shape_cast_fp16")]; + int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)]; + int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)]; + bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)]; + string var_2963_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2963_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)]; + tensor var_2963_shape_cast_fp16_to_uint16 = cast(dtype = var_2963_shape_cast_fp16_to_uint16_dtype_0, x = var_2963_shape_cast_fp16)[name = string("cast_364")]; + uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2963_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")]; + string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_363")]; + int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")]; + tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([0])]; + tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([0])]; + tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; + tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([13])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")]; + tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([0])]; + tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; + tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; + int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; + bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; + tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")]; + tensor k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")]; + tensor v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")]; + int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)]; + int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1280)]; + int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)]; + bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)]; + tensor concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")]; + tensor var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_296, end_mask = var_2979_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2979_cast_fp16")]; + tensor var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = concat_296, end_mask = var_2982_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2982_cast_fp16")]; + tensor concat_298x = const()[name = string("concat_298x"), val = tensor([1, -1, 20, 64])]; + tensor var_2992_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2992_cast_fp16")]; + tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_107_cast_fp16 = mul(x = var_2992_cast_fp16, y = const_212_to_fp16)[name = string("q_107_cast_fp16")]; + tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, -1, 20, 64])]; + tensor var_2999_cast_fp16 = reshape(shape = concat_299x, x = var_2979_cast_fp16)[name = string("op_2999_cast_fp16")]; + tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_135_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_213_to_fp16)[name = string("k_135_cast_fp16")]; + tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 20, 64])]; + tensor var_3006_cast_fp16 = reshape(shape = concat_300x, x = var_2982_cast_fp16)[name = string("op_3006_cast_fp16")]; + tensor var_3007 = const()[name = string("op_3007"), val = tensor([0, 2, 1, 3])]; + bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)]; + bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)]; + tensor transpose_309_perm_0 = const()[name = string("transpose_309_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_310_perm_0 = const()[name = string("transpose_310_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_310 = transpose(perm = transpose_310_perm_0, x = k_135_cast_fp16)[name = string("transpose_534")]; + tensor transpose_309 = transpose(perm = transpose_309_perm_0, x = q_107_cast_fp16)[name = string("transpose_535")]; + tensor qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_309, y = transpose_310)[name = string("qk_79_cast_fp16")]; + int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)]; + int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; + bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; + tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")]; + tensor var_3010_begin_0 = const()[name = string("op_3010_begin_0"), val = tensor([0, 0])]; + tensor var_3010_end_mask_0 = const()[name = string("op_3010_end_mask_0"), val = tensor([false, true])]; + tensor var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = concat_301, end_mask = var_3010_end_mask_0, x = mask_to_fp16)[name = string("op_3010_cast_fp16")]; + int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)]; + int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; + bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; + tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")]; + tensor var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor([0, 0])]; + tensor var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor([true, false])]; + tensor var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = concat_302, end_mask = var_3011_end_mask_0, x = var_3010_cast_fp16)[name = string("op_3011_cast_fp16")]; + tensor qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_3011_cast_fp16)[name = string("qk_81_cast_fp16")]; + tensor var_3014_cast_fp16 = softmax(axis = var_2923, x = qk_81_cast_fp16)[name = string("op_3014_cast_fp16")]; + bool var_3016_transpose_x_0 = const()[name = string("op_3016_transpose_x_0"), val = bool(false)]; + bool var_3016_transpose_y_0 = const()[name = string("op_3016_transpose_y_0"), val = bool(false)]; + tensor v_135_cast_fp16 = transpose(perm = var_3007, x = var_3006_cast_fp16)[name = string("transpose_536")]; + tensor var_3016_cast_fp16 = matmul(transpose_x = var_3016_transpose_x_0, transpose_y = var_3016_transpose_y_0, x = var_3014_cast_fp16, y = v_135_cast_fp16)[name = string("op_3016_cast_fp16")]; + tensor var_3017 = const()[name = string("op_3017"), val = tensor([0, 2, 1, 3])]; + tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 1280])]; + tensor var_3018_cast_fp16 = transpose(perm = var_3017, x = var_3016_cast_fp16)[name = string("transpose_533")]; + tensor x_241_cast_fp16 = reshape(shape = concat_303x, x = var_3018_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744934272)))]; + tensor var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748211136)))]; + tensor linear_107_cast_fp16 = linear(bias = var_3023_to_fp16, weight = var_3022_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor([-1])]; + tensor blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748213760)))]; + tensor blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748216384)))]; + tensor var_3030_cast_fp16 = layer_norm(axes = var_3030_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_3030_cast_fp16")]; + tensor var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748219008)))]; + tensor var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751495872)))]; + tensor linear_108_cast_fp16 = linear(bias = var_3040_to_fp16, weight = var_3039_to_fp16, x = var_3030_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor concat_304 = const()[name = string("concat_304"), val = tensor([0, 0, 0])]; + tensor concat_305 = const()[name = string("concat_305"), val = tensor([0, 1500, 0])]; + tensor k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_306 = const()[name = string("concat_306"), val = tensor([0, 0, 0])]; + tensor concat_307 = const()[name = string("concat_307"), val = tensor([0, 1500, 0])]; + tensor v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_308x = const()[name = string("concat_308x"), val = tensor([1, -1, 20, 64])]; + tensor var_3060_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3060_cast_fp16")]; + tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_111_cast_fp16 = mul(x = var_3060_cast_fp16, y = const_214_to_fp16)[name = string("q_111_cast_fp16")]; + tensor var_3066 = const()[name = string("op_3066"), val = tensor([1, 1500, 20, -1])]; + tensor var_3067_cast_fp16 = reshape(shape = var_3066, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3067_cast_fp16")]; + tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_139_cast_fp16 = mul(x = var_3067_cast_fp16, y = const_215_to_fp16)[name = string("k_139_cast_fp16")]; + tensor var_3073 = const()[name = string("op_3073"), val = tensor([1, 1500, 20, -1])]; + tensor var_3074_cast_fp16 = reshape(shape = var_3073, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3074_cast_fp16")]; + tensor var_3075 = const()[name = string("op_3075"), val = tensor([0, 2, 1, 3])]; + bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)]; + bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)]; + tensor transpose_311_perm_0 = const()[name = string("transpose_311_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_312_perm_0 = const()[name = string("transpose_312_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_312 = transpose(perm = transpose_312_perm_0, x = k_139_cast_fp16)[name = string("transpose_530")]; + tensor transpose_311 = transpose(perm = transpose_311_perm_0, x = q_111_cast_fp16)[name = string("transpose_531")]; + tensor qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_311, y = transpose_312)[name = string("qk_83_cast_fp16")]; + tensor var_3079_cast_fp16 = softmax(axis = var_2923, x = qk_83_cast_fp16)[name = string("op_3079_cast_fp16")]; + bool var_3081_transpose_x_0 = const()[name = string("op_3081_transpose_x_0"), val = bool(false)]; + bool var_3081_transpose_y_0 = const()[name = string("op_3081_transpose_y_0"), val = bool(false)]; + tensor v_139_cast_fp16 = transpose(perm = var_3075, x = var_3074_cast_fp16)[name = string("transpose_532")]; + tensor var_3081_cast_fp16 = matmul(transpose_x = var_3081_transpose_x_0, transpose_y = var_3081_transpose_y_0, x = var_3079_cast_fp16, y = v_139_cast_fp16)[name = string("op_3081_cast_fp16")]; + tensor var_3082 = const()[name = string("op_3082"), val = tensor([0, 2, 1, 3])]; + tensor concat_309x = const()[name = string("concat_309x"), val = tensor([1, -1, 1280])]; + tensor var_3083_cast_fp16 = transpose(perm = var_3082, x = var_3081_cast_fp16)[name = string("transpose_529")]; + tensor x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3083_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751498496)))]; + tensor var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754775360)))]; + tensor linear_109_cast_fp16 = linear(bias = var_3088_to_fp16, weight = var_3087_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")]; + tensor var_3095_axes_0 = const()[name = string("op_3095_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754777984)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754780608)))]; + tensor var_3095_cast_fp16 = layer_norm(axes = var_3095_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3095_cast_fp16")]; + tensor var_3104_to_fp16 = const()[name = string("op_3104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754783232)))]; + tensor var_3105_to_fp16 = const()[name = string("op_3105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767890496)))]; + tensor linear_110_cast_fp16 = linear(bias = var_3105_to_fp16, weight = var_3104_to_fp16, x = var_3095_cast_fp16)[name = string("linear_110_cast_fp16")]; + string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")]; + tensor x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_3110_to_fp16 = const()[name = string("op_3110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767900800)))]; + tensor var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781008064)))]; + tensor linear_111_cast_fp16 = linear(bias = var_3111_to_fp16, weight = var_3110_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")]; + tensor k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; + tensor k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_57_cast_fp16")]; + tensor v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; + tensor v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_57_cast_fp16")]; + tensor k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; + tensor k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")]; + tensor v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; + tensor v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")]; + int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)]; + tensor var_3152_axes_0 = const()[name = string("op_3152_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781010688)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781013312)))]; + fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3152_cast_fp16 = layer_norm(axes = var_3152_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3152_cast_fp16")]; + tensor var_3163_to_fp16 = const()[name = string("op_3163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781015936)))]; + tensor var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784292800)))]; + tensor linear_112_cast_fp16 = linear(bias = var_3164_to_fp16, weight = var_3163_to_fp16, x = var_3152_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor var_3167_to_fp16 = const()[name = string("op_3167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784295424)))]; + tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3167_to_fp16, x = var_3152_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787572288)))]; + tensor var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790849152)))]; + tensor linear_114_cast_fp16 = linear(bias = var_3172_to_fp16, weight = var_3171_to_fp16, x = var_3152_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_3174_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3174_shape_cast_fp16")]; + int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)]; + int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)]; + bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)]; + string var_3174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)]; + tensor var_3174_shape_cast_fp16_to_uint16 = cast(dtype = var_3174_shape_cast_fp16_to_uint16_dtype_0, x = var_3174_shape_cast_fp16)[name = string("cast_362")]; + uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3174_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")]; + string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_361")]; + int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")]; + tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([0])]; + tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; + tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")]; + tensor concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor([14])]; + int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; + bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; + tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")]; + tensor concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor([0])]; + tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; + tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; + int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; + bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; + tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")]; + tensor k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")]; + tensor v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")]; + int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)]; + int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1280)]; + int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; + bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; + tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")]; + tensor var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_318, end_mask = var_3190_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3190_cast_fp16")]; + tensor var_3193_begin_0 = const()[name = string("op_3193_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3193_end_mask_0 = const()[name = string("op_3193_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3193_cast_fp16 = slice_by_index(begin = var_3193_begin_0, end = concat_318, end_mask = var_3193_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3193_cast_fp16")]; + tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, -1, 20, 64])]; + tensor var_3203_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3203_cast_fp16")]; + tensor const_216_to_fp16 = const()[name = string("const_216_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_115_cast_fp16 = mul(x = var_3203_cast_fp16, y = const_216_to_fp16)[name = string("q_115_cast_fp16")]; + tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, -1, 20, 64])]; + tensor var_3210_cast_fp16 = reshape(shape = concat_321x, x = var_3190_cast_fp16)[name = string("op_3210_cast_fp16")]; + tensor const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_145_cast_fp16 = mul(x = var_3210_cast_fp16, y = const_217_to_fp16)[name = string("k_145_cast_fp16")]; + tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 20, 64])]; + tensor var_3217_cast_fp16 = reshape(shape = concat_322x, x = var_3193_cast_fp16)[name = string("op_3217_cast_fp16")]; + tensor var_3218 = const()[name = string("op_3218"), val = tensor([0, 2, 1, 3])]; + bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)]; + bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)]; + tensor transpose_313_perm_0 = const()[name = string("transpose_313_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_314_perm_0 = const()[name = string("transpose_314_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_314 = transpose(perm = transpose_314_perm_0, x = k_145_cast_fp16)[name = string("transpose_526")]; + tensor transpose_313 = transpose(perm = transpose_313_perm_0, x = q_115_cast_fp16)[name = string("transpose_527")]; + tensor qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_313, y = transpose_314)[name = string("qk_85_cast_fp16")]; + int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)]; + int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; + bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; + tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")]; + tensor var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor([0, 0])]; + tensor var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor([false, true])]; + tensor var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = concat_323, end_mask = var_3221_end_mask_0, x = mask_to_fp16)[name = string("op_3221_cast_fp16")]; + int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)]; + int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)]; + bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)]; + tensor concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")]; + tensor var_3222_begin_0 = const()[name = string("op_3222_begin_0"), val = tensor([0, 0])]; + tensor var_3222_end_mask_0 = const()[name = string("op_3222_end_mask_0"), val = tensor([true, false])]; + tensor var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = concat_324, end_mask = var_3222_end_mask_0, x = var_3221_cast_fp16)[name = string("op_3222_cast_fp16")]; + tensor qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3222_cast_fp16)[name = string("qk_87_cast_fp16")]; + tensor var_3225_cast_fp16 = softmax(axis = var_3134, x = qk_87_cast_fp16)[name = string("op_3225_cast_fp16")]; + bool var_3227_transpose_x_0 = const()[name = string("op_3227_transpose_x_0"), val = bool(false)]; + bool var_3227_transpose_y_0 = const()[name = string("op_3227_transpose_y_0"), val = bool(false)]; + tensor v_145_cast_fp16 = transpose(perm = var_3218, x = var_3217_cast_fp16)[name = string("transpose_528")]; + tensor var_3227_cast_fp16 = matmul(transpose_x = var_3227_transpose_x_0, transpose_y = var_3227_transpose_y_0, x = var_3225_cast_fp16, y = v_145_cast_fp16)[name = string("op_3227_cast_fp16")]; + tensor var_3228 = const()[name = string("op_3228"), val = tensor([0, 2, 1, 3])]; + tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 1280])]; + tensor var_3229_cast_fp16 = transpose(perm = var_3228, x = var_3227_cast_fp16)[name = string("transpose_525")]; + tensor x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3229_cast_fp16)[name = string("x_259_cast_fp16")]; + tensor var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790851776)))]; + tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794128640)))]; + tensor linear_115_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_3241_axes_0 = const()[name = string("op_3241_axes_0"), val = tensor([-1])]; + tensor blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794131264)))]; + tensor blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794133888)))]; + tensor var_3241_cast_fp16 = layer_norm(axes = var_3241_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3241_cast_fp16")]; + tensor var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794136512)))]; + tensor var_3251_to_fp16 = const()[name = string("op_3251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797413376)))]; + tensor linear_116_cast_fp16 = linear(bias = var_3251_to_fp16, weight = var_3250_to_fp16, x = var_3241_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor concat_326 = const()[name = string("concat_326"), val = tensor([0, 0, 0])]; + tensor concat_327 = const()[name = string("concat_327"), val = tensor([0, 1500, 0])]; + tensor k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_328 = const()[name = string("concat_328"), val = tensor([0, 0, 0])]; + tensor concat_329 = const()[name = string("concat_329"), val = tensor([0, 1500, 0])]; + tensor v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 20, 64])]; + tensor var_3271_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3271_cast_fp16")]; + tensor const_218_to_fp16 = const()[name = string("const_218_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_119_cast_fp16 = mul(x = var_3271_cast_fp16, y = const_218_to_fp16)[name = string("q_119_cast_fp16")]; + tensor var_3277 = const()[name = string("op_3277"), val = tensor([1, 1500, 20, -1])]; + tensor var_3278_cast_fp16 = reshape(shape = var_3277, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_149_cast_fp16 = mul(x = var_3278_cast_fp16, y = const_219_to_fp16)[name = string("k_149_cast_fp16")]; + tensor var_3284 = const()[name = string("op_3284"), val = tensor([1, 1500, 20, -1])]; + tensor var_3285_cast_fp16 = reshape(shape = var_3284, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3285_cast_fp16")]; + tensor var_3286 = const()[name = string("op_3286"), val = tensor([0, 2, 1, 3])]; + bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)]; + bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)]; + tensor transpose_315_perm_0 = const()[name = string("transpose_315_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_316_perm_0 = const()[name = string("transpose_316_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_316 = transpose(perm = transpose_316_perm_0, x = k_149_cast_fp16)[name = string("transpose_522")]; + tensor transpose_315 = transpose(perm = transpose_315_perm_0, x = q_119_cast_fp16)[name = string("transpose_523")]; + tensor qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_315, y = transpose_316)[name = string("qk_89_cast_fp16")]; + tensor var_3290_cast_fp16 = softmax(axis = var_3134, x = qk_89_cast_fp16)[name = string("op_3290_cast_fp16")]; + bool var_3292_transpose_x_0 = const()[name = string("op_3292_transpose_x_0"), val = bool(false)]; + bool var_3292_transpose_y_0 = const()[name = string("op_3292_transpose_y_0"), val = bool(false)]; + tensor v_149_cast_fp16 = transpose(perm = var_3286, x = var_3285_cast_fp16)[name = string("transpose_524")]; + tensor var_3292_cast_fp16 = matmul(transpose_x = var_3292_transpose_x_0, transpose_y = var_3292_transpose_y_0, x = var_3290_cast_fp16, y = v_149_cast_fp16)[name = string("op_3292_cast_fp16")]; + tensor var_3293 = const()[name = string("op_3293"), val = tensor([0, 2, 1, 3])]; + tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 1280])]; + tensor var_3294_cast_fp16 = transpose(perm = var_3293, x = var_3292_cast_fp16)[name = string("transpose_521")]; + tensor x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3294_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797416000)))]; + tensor var_3299_to_fp16 = const()[name = string("op_3299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800692864)))]; + tensor linear_117_cast_fp16 = linear(bias = var_3299_to_fp16, weight = var_3298_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")]; + tensor var_3306_axes_0 = const()[name = string("op_3306_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800695488)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800698112)))]; + tensor var_3306_cast_fp16 = layer_norm(axes = var_3306_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3306_cast_fp16")]; + tensor var_3315_to_fp16 = const()[name = string("op_3315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800700736)))]; + tensor var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813808000)))]; + tensor linear_118_cast_fp16 = linear(bias = var_3316_to_fp16, weight = var_3315_to_fp16, x = var_3306_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")]; + tensor x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")]; + tensor var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813818304)))]; + tensor var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826925568)))]; + tensor linear_119_cast_fp16 = linear(bias = var_3322_to_fp16, weight = var_3321_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")]; + tensor k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; + tensor k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_61_cast_fp16")]; + tensor v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; + tensor v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_61_cast_fp16")]; + tensor k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; + tensor k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")]; + tensor v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; + tensor v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")]; + int32 var_3345 = const()[name = string("op_3345"), val = int32(-1)]; + tensor var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826928192)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826930816)))]; + fp16 var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3363_cast_fp16 = layer_norm(axes = var_3363_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3363_cast_fp16")]; + tensor var_3374_to_fp16 = const()[name = string("op_3374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826933440)))]; + tensor var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830210304)))]; + tensor linear_120_cast_fp16 = linear(bias = var_3375_to_fp16, weight = var_3374_to_fp16, x = var_3363_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830212928)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3378_to_fp16, x = var_3363_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_3382_to_fp16 = const()[name = string("op_3382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833489792)))]; + tensor var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836766656)))]; + tensor linear_122_cast_fp16 = linear(bias = var_3383_to_fp16, weight = var_3382_to_fp16, x = var_3363_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_3385_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3385_shape_cast_fp16")]; + int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; + int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; + bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; + string var_3385_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3385_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)]; + tensor var_3385_shape_cast_fp16_to_uint16 = cast(dtype = var_3385_shape_cast_fp16_to_uint16_dtype_0, x = var_3385_shape_cast_fp16)[name = string("cast_360")]; + uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3385_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; + string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_359")]; + int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([0])]; + tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; + tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")]; + tensor concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor([15])]; + int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)]; + bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)]; + tensor concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")]; + tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([0])]; + tensor concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor([0])]; + tensor concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor([0])]; + int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; + bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; + tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")]; + tensor k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = k_cache1)[name = string("coreml_update_state_94")]; + tensor v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = v_cache1)[name = string("coreml_update_state_95")]; + int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; + int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1280)]; + int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; + bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; + tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")]; + tensor var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_340, end_mask = var_3401_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3401_cast_fp16")]; + tensor var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = concat_340, end_mask = var_3404_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3404_cast_fp16")]; + tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 20, 64])]; + tensor var_3414_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3414_cast_fp16")]; + tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_123_cast_fp16 = mul(x = var_3414_cast_fp16, y = const_220_to_fp16)[name = string("q_123_cast_fp16")]; + tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 20, 64])]; + tensor var_3421_cast_fp16 = reshape(shape = concat_343x, x = var_3401_cast_fp16)[name = string("op_3421_cast_fp16")]; + tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_155_cast_fp16 = mul(x = var_3421_cast_fp16, y = const_221_to_fp16)[name = string("k_155_cast_fp16")]; + tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 20, 64])]; + tensor var_3428_cast_fp16 = reshape(shape = concat_344x, x = var_3404_cast_fp16)[name = string("op_3428_cast_fp16")]; + tensor var_3429 = const()[name = string("op_3429"), val = tensor([0, 2, 1, 3])]; + bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)]; + bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)]; + tensor transpose_317_perm_0 = const()[name = string("transpose_317_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_318_perm_0 = const()[name = string("transpose_318_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_318 = transpose(perm = transpose_318_perm_0, x = k_155_cast_fp16)[name = string("transpose_518")]; + tensor transpose_317 = transpose(perm = transpose_317_perm_0, x = q_123_cast_fp16)[name = string("transpose_519")]; + tensor qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_317, y = transpose_318)[name = string("qk_91_cast_fp16")]; + int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)]; + int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; + bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; + tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")]; + tensor var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor([0, 0])]; + tensor var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor([false, true])]; + tensor var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = concat_345, end_mask = var_3432_end_mask_0, x = mask_to_fp16)[name = string("op_3432_cast_fp16")]; + int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)]; + int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)]; + bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)]; + tensor concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")]; + tensor var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor([0, 0])]; + tensor var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor([true, false])]; + tensor var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = concat_346, end_mask = var_3433_end_mask_0, x = var_3432_cast_fp16)[name = string("op_3433_cast_fp16")]; + tensor qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3433_cast_fp16)[name = string("qk_93_cast_fp16")]; + tensor var_3436_cast_fp16 = softmax(axis = var_3345, x = qk_93_cast_fp16)[name = string("op_3436_cast_fp16")]; + bool var_3438_transpose_x_0 = const()[name = string("op_3438_transpose_x_0"), val = bool(false)]; + bool var_3438_transpose_y_0 = const()[name = string("op_3438_transpose_y_0"), val = bool(false)]; + tensor v_155_cast_fp16 = transpose(perm = var_3429, x = var_3428_cast_fp16)[name = string("transpose_520")]; + tensor var_3438_cast_fp16 = matmul(transpose_x = var_3438_transpose_x_0, transpose_y = var_3438_transpose_y_0, x = var_3436_cast_fp16, y = v_155_cast_fp16)[name = string("op_3438_cast_fp16")]; + tensor var_3439 = const()[name = string("op_3439"), val = tensor([0, 2, 1, 3])]; + tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 1280])]; + tensor var_3440_cast_fp16 = transpose(perm = var_3439, x = var_3438_cast_fp16)[name = string("transpose_517")]; + tensor x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3440_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836769280)))]; + tensor var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840046144)))]; + tensor linear_123_cast_fp16 = linear(bias = var_3445_to_fp16, weight = var_3444_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")]; + tensor var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor([-1])]; + tensor blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840048768)))]; + tensor blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840051392)))]; + tensor var_3452_cast_fp16 = layer_norm(axes = var_3452_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3452_cast_fp16")]; + tensor var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840054016)))]; + tensor var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843330880)))]; + tensor linear_124_cast_fp16 = linear(bias = var_3462_to_fp16, weight = var_3461_to_fp16, x = var_3452_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor concat_348 = const()[name = string("concat_348"), val = tensor([0, 0, 0])]; + tensor concat_349 = const()[name = string("concat_349"), val = tensor([0, 1500, 0])]; + tensor k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_350 = const()[name = string("concat_350"), val = tensor([0, 0, 0])]; + tensor concat_351 = const()[name = string("concat_351"), val = tensor([0, 1500, 0])]; + tensor v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_352x = const()[name = string("concat_352x"), val = tensor([1, -1, 20, 64])]; + tensor var_3482_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3482_cast_fp16")]; + tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_127_cast_fp16 = mul(x = var_3482_cast_fp16, y = const_222_to_fp16)[name = string("q_127_cast_fp16")]; + tensor var_3488 = const()[name = string("op_3488"), val = tensor([1, 1500, 20, -1])]; + tensor var_3489_cast_fp16 = reshape(shape = var_3488, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3489_cast_fp16")]; + tensor const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_159_cast_fp16 = mul(x = var_3489_cast_fp16, y = const_223_to_fp16)[name = string("k_159_cast_fp16")]; + tensor var_3495 = const()[name = string("op_3495"), val = tensor([1, 1500, 20, -1])]; + tensor var_3496_cast_fp16 = reshape(shape = var_3495, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3496_cast_fp16")]; + tensor var_3497 = const()[name = string("op_3497"), val = tensor([0, 2, 1, 3])]; + bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)]; + bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)]; + tensor transpose_319_perm_0 = const()[name = string("transpose_319_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_320_perm_0 = const()[name = string("transpose_320_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_320 = transpose(perm = transpose_320_perm_0, x = k_159_cast_fp16)[name = string("transpose_514")]; + tensor transpose_319 = transpose(perm = transpose_319_perm_0, x = q_127_cast_fp16)[name = string("transpose_515")]; + tensor qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_319, y = transpose_320)[name = string("qk_95_cast_fp16")]; + tensor var_3501_cast_fp16 = softmax(axis = var_3345, x = qk_95_cast_fp16)[name = string("op_3501_cast_fp16")]; + bool var_3503_transpose_x_0 = const()[name = string("op_3503_transpose_x_0"), val = bool(false)]; + bool var_3503_transpose_y_0 = const()[name = string("op_3503_transpose_y_0"), val = bool(false)]; + tensor v_159_cast_fp16 = transpose(perm = var_3497, x = var_3496_cast_fp16)[name = string("transpose_516")]; + tensor var_3503_cast_fp16 = matmul(transpose_x = var_3503_transpose_x_0, transpose_y = var_3503_transpose_y_0, x = var_3501_cast_fp16, y = v_159_cast_fp16)[name = string("op_3503_cast_fp16")]; + tensor var_3504 = const()[name = string("op_3504"), val = tensor([0, 2, 1, 3])]; + tensor concat_353x = const()[name = string("concat_353x"), val = tensor([1, -1, 1280])]; + tensor var_3505_cast_fp16 = transpose(perm = var_3504, x = var_3503_cast_fp16)[name = string("transpose_513")]; + tensor x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3505_cast_fp16)[name = string("x_283_cast_fp16")]; + tensor var_3509_to_fp16 = const()[name = string("op_3509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843333504)))]; + tensor var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846610368)))]; + tensor linear_125_cast_fp16 = linear(bias = var_3510_to_fp16, weight = var_3509_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")]; + tensor var_3517_axes_0 = const()[name = string("op_3517_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846612992)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846615616)))]; + tensor var_3517_cast_fp16 = layer_norm(axes = var_3517_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3517_cast_fp16")]; + tensor var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846618240)))]; + tensor var_3527_to_fp16 = const()[name = string("op_3527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859725504)))]; + tensor linear_126_cast_fp16 = linear(bias = var_3527_to_fp16, weight = var_3526_to_fp16, x = var_3517_cast_fp16)[name = string("linear_126_cast_fp16")]; + string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")]; + tensor x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859735808)))]; + tensor var_3533_to_fp16 = const()[name = string("op_3533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872843072)))]; + tensor linear_127_cast_fp16 = linear(bias = var_3533_to_fp16, weight = var_3532_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")]; + tensor k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; + tensor k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_94)[name = string("k_cache_65_cast_fp16")]; + tensor v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; + tensor v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_95)[name = string("v_cache_65_cast_fp16")]; + tensor k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; + tensor k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")]; + tensor v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; + tensor v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")]; + int32 var_3556 = const()[name = string("op_3556"), val = int32(-1)]; + tensor var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872845696)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872848320)))]; + fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3574_cast_fp16 = layer_norm(axes = var_3574_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3574_cast_fp16")]; + tensor var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872850944)))]; + tensor var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876127808)))]; + tensor linear_128_cast_fp16 = linear(bias = var_3586_to_fp16, weight = var_3585_to_fp16, x = var_3574_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876130432)))]; + tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3589_to_fp16, x = var_3574_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor var_3593_to_fp16 = const()[name = string("op_3593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(879407296)))]; + tensor var_3594_to_fp16 = const()[name = string("op_3594_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882684160)))]; + tensor linear_130_cast_fp16 = linear(bias = var_3594_to_fp16, weight = var_3593_to_fp16, x = var_3574_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_3596_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3596_shape_cast_fp16")]; + int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)]; + int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)]; + bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)]; + string var_3596_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3596_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)]; + tensor var_3596_shape_cast_fp16_to_uint16 = cast(dtype = var_3596_shape_cast_fp16_to_uint16_dtype_0, x = var_3596_shape_cast_fp16)[name = string("cast_358")]; + uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3596_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")]; + string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_357")]; + int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([0])]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([0])]; + tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; + tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")]; + tensor concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor([16])]; + int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; + bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; + tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")]; + tensor concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor([0])]; + tensor concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor([0])]; + tensor concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor([0])]; + int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; + bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; + tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")]; + tensor k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_94)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = k_cache1)[name = string("coreml_update_state_96")]; + tensor v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_95)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = v_cache1)[name = string("coreml_update_state_97")]; + int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)]; + int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1280)]; + int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; + bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; + tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")]; + tensor var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_362, end_mask = var_3612_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3612_cast_fp16")]; + tensor var_3615_begin_0 = const()[name = string("op_3615_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3615_end_mask_0 = const()[name = string("op_3615_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3615_cast_fp16 = slice_by_index(begin = var_3615_begin_0, end = concat_362, end_mask = var_3615_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3615_cast_fp16")]; + tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 20, 64])]; + tensor var_3625_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3625_cast_fp16")]; + tensor const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_131_cast_fp16 = mul(x = var_3625_cast_fp16, y = const_224_to_fp16)[name = string("q_131_cast_fp16")]; + tensor concat_365x = const()[name = string("concat_365x"), val = tensor([1, -1, 20, 64])]; + tensor var_3632_cast_fp16 = reshape(shape = concat_365x, x = var_3612_cast_fp16)[name = string("op_3632_cast_fp16")]; + tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_165_cast_fp16 = mul(x = var_3632_cast_fp16, y = const_225_to_fp16)[name = string("k_165_cast_fp16")]; + tensor concat_366x = const()[name = string("concat_366x"), val = tensor([1, -1, 20, 64])]; + tensor var_3639_cast_fp16 = reshape(shape = concat_366x, x = var_3615_cast_fp16)[name = string("op_3639_cast_fp16")]; + tensor var_3640 = const()[name = string("op_3640"), val = tensor([0, 2, 1, 3])]; + bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)]; + bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)]; + tensor transpose_321_perm_0 = const()[name = string("transpose_321_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_322_perm_0 = const()[name = string("transpose_322_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_322 = transpose(perm = transpose_322_perm_0, x = k_165_cast_fp16)[name = string("transpose_510")]; + tensor transpose_321 = transpose(perm = transpose_321_perm_0, x = q_131_cast_fp16)[name = string("transpose_511")]; + tensor qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_321, y = transpose_322)[name = string("qk_97_cast_fp16")]; + int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)]; + int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; + bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; + tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")]; + tensor var_3643_begin_0 = const()[name = string("op_3643_begin_0"), val = tensor([0, 0])]; + tensor var_3643_end_mask_0 = const()[name = string("op_3643_end_mask_0"), val = tensor([false, true])]; + tensor var_3643_cast_fp16 = slice_by_index(begin = var_3643_begin_0, end = concat_367, end_mask = var_3643_end_mask_0, x = mask_to_fp16)[name = string("op_3643_cast_fp16")]; + int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)]; + int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)]; + bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)]; + tensor concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")]; + tensor var_3644_begin_0 = const()[name = string("op_3644_begin_0"), val = tensor([0, 0])]; + tensor var_3644_end_mask_0 = const()[name = string("op_3644_end_mask_0"), val = tensor([true, false])]; + tensor var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = concat_368, end_mask = var_3644_end_mask_0, x = var_3643_cast_fp16)[name = string("op_3644_cast_fp16")]; + tensor qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3644_cast_fp16)[name = string("qk_99_cast_fp16")]; + tensor var_3647_cast_fp16 = softmax(axis = var_3556, x = qk_99_cast_fp16)[name = string("op_3647_cast_fp16")]; + bool var_3649_transpose_x_0 = const()[name = string("op_3649_transpose_x_0"), val = bool(false)]; + bool var_3649_transpose_y_0 = const()[name = string("op_3649_transpose_y_0"), val = bool(false)]; + tensor v_165_cast_fp16 = transpose(perm = var_3640, x = var_3639_cast_fp16)[name = string("transpose_512")]; + tensor var_3649_cast_fp16 = matmul(transpose_x = var_3649_transpose_x_0, transpose_y = var_3649_transpose_y_0, x = var_3647_cast_fp16, y = v_165_cast_fp16)[name = string("op_3649_cast_fp16")]; + tensor var_3650 = const()[name = string("op_3650"), val = tensor([0, 2, 1, 3])]; + tensor concat_369x = const()[name = string("concat_369x"), val = tensor([1, -1, 1280])]; + tensor var_3651_cast_fp16 = transpose(perm = var_3650, x = var_3649_cast_fp16)[name = string("transpose_509")]; + tensor x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3651_cast_fp16)[name = string("x_295_cast_fp16")]; + tensor var_3655_to_fp16 = const()[name = string("op_3655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882686784)))]; + tensor var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885963648)))]; + tensor linear_131_cast_fp16 = linear(bias = var_3656_to_fp16, weight = var_3655_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")]; + tensor var_3663_axes_0 = const()[name = string("op_3663_axes_0"), val = tensor([-1])]; + tensor blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885966272)))]; + tensor blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885968896)))]; + tensor var_3663_cast_fp16 = layer_norm(axes = var_3663_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3663_cast_fp16")]; + tensor var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885971520)))]; + tensor var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889248384)))]; + tensor linear_132_cast_fp16 = linear(bias = var_3673_to_fp16, weight = var_3672_to_fp16, x = var_3663_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor concat_370 = const()[name = string("concat_370"), val = tensor([0, 0, 0])]; + tensor concat_371 = const()[name = string("concat_371"), val = tensor([0, 1500, 0])]; + tensor k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_372 = const()[name = string("concat_372"), val = tensor([0, 0, 0])]; + tensor concat_373 = const()[name = string("concat_373"), val = tensor([0, 1500, 0])]; + tensor v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_374x = const()[name = string("concat_374x"), val = tensor([1, -1, 20, 64])]; + tensor var_3693_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3693_cast_fp16")]; + tensor const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_135_cast_fp16 = mul(x = var_3693_cast_fp16, y = const_226_to_fp16)[name = string("q_135_cast_fp16")]; + tensor var_3699 = const()[name = string("op_3699"), val = tensor([1, 1500, 20, -1])]; + tensor var_3700_cast_fp16 = reshape(shape = var_3699, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3700_cast_fp16")]; + tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_169_cast_fp16 = mul(x = var_3700_cast_fp16, y = const_227_to_fp16)[name = string("k_169_cast_fp16")]; + tensor var_3706 = const()[name = string("op_3706"), val = tensor([1, 1500, 20, -1])]; + tensor var_3707_cast_fp16 = reshape(shape = var_3706, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3707_cast_fp16")]; + tensor var_3708 = const()[name = string("op_3708"), val = tensor([0, 2, 1, 3])]; + bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)]; + bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)]; + tensor transpose_323_perm_0 = const()[name = string("transpose_323_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_324_perm_0 = const()[name = string("transpose_324_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_324 = transpose(perm = transpose_324_perm_0, x = k_169_cast_fp16)[name = string("transpose_506")]; + tensor transpose_323 = transpose(perm = transpose_323_perm_0, x = q_135_cast_fp16)[name = string("transpose_507")]; + tensor qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_323, y = transpose_324)[name = string("qk_101_cast_fp16")]; + tensor var_3712_cast_fp16 = softmax(axis = var_3556, x = qk_101_cast_fp16)[name = string("op_3712_cast_fp16")]; + bool var_3714_transpose_x_0 = const()[name = string("op_3714_transpose_x_0"), val = bool(false)]; + bool var_3714_transpose_y_0 = const()[name = string("op_3714_transpose_y_0"), val = bool(false)]; + tensor v_169_cast_fp16 = transpose(perm = var_3708, x = var_3707_cast_fp16)[name = string("transpose_508")]; + tensor var_3714_cast_fp16 = matmul(transpose_x = var_3714_transpose_x_0, transpose_y = var_3714_transpose_y_0, x = var_3712_cast_fp16, y = v_169_cast_fp16)[name = string("op_3714_cast_fp16")]; + tensor var_3715 = const()[name = string("op_3715"), val = tensor([0, 2, 1, 3])]; + tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, -1, 1280])]; + tensor var_3716_cast_fp16 = transpose(perm = var_3715, x = var_3714_cast_fp16)[name = string("transpose_505")]; + tensor x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3716_cast_fp16)[name = string("x_301_cast_fp16")]; + tensor var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889251008)))]; + tensor var_3721_to_fp16 = const()[name = string("op_3721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892527872)))]; + tensor linear_133_cast_fp16 = linear(bias = var_3721_to_fp16, weight = var_3720_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")]; + tensor var_3728_axes_0 = const()[name = string("op_3728_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892530496)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892533120)))]; + tensor var_3728_cast_fp16 = layer_norm(axes = var_3728_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3728_cast_fp16")]; + tensor var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892535744)))]; + tensor var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905643008)))]; + tensor linear_134_cast_fp16 = linear(bias = var_3738_to_fp16, weight = var_3737_to_fp16, x = var_3728_cast_fp16)[name = string("linear_134_cast_fp16")]; + string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")]; + tensor x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")]; + tensor var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905653312)))]; + tensor var_3744_to_fp16 = const()[name = string("op_3744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918760576)))]; + tensor linear_135_cast_fp16 = linear(bias = var_3744_to_fp16, weight = var_3743_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; + tensor k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_96)[name = string("k_cache_69_cast_fp16")]; + tensor v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; + tensor v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_97)[name = string("v_cache_69_cast_fp16")]; + tensor k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; + tensor k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")]; + tensor v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; + tensor v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")]; + int32 var_3767 = const()[name = string("op_3767"), val = int32(-1)]; + tensor var_3785_axes_0 = const()[name = string("op_3785_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918763200)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918765824)))]; + fp16 var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3785_cast_fp16 = layer_norm(axes = var_3785_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3785_cast_fp16")]; + tensor var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918768448)))]; + tensor var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922045312)))]; + tensor linear_136_cast_fp16 = linear(bias = var_3797_to_fp16, weight = var_3796_to_fp16, x = var_3785_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922047936)))]; + tensor linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3800_to_fp16, x = var_3785_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3804_to_fp16 = const()[name = string("op_3804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925324800)))]; + tensor var_3805_to_fp16 = const()[name = string("op_3805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928601664)))]; + tensor linear_138_cast_fp16 = linear(bias = var_3805_to_fp16, weight = var_3804_to_fp16, x = var_3785_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_3807_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3807_shape_cast_fp16")]; + int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; + int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; + bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; + string var_3807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)]; + tensor var_3807_shape_cast_fp16_to_uint16 = cast(dtype = var_3807_shape_cast_fp16_to_uint16_dtype_0, x = var_3807_shape_cast_fp16)[name = string("cast_356")]; + uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3807_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; + string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_355")]; + int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")]; + tensor expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([0])]; + tensor expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor([0])]; + tensor expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")]; + tensor concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor([17])]; + int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; + bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; + tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")]; + tensor concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor([0])]; + tensor concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor([0])]; + tensor concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor([0])]; + int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; + bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; + tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")]; + tensor k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_96)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = k_cache1)[name = string("coreml_update_state_98")]; + tensor v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_97)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = v_cache1)[name = string("coreml_update_state_99")]; + int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)]; + int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1280)]; + int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; + bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; + tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")]; + tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_384, end_mask = var_3823_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3823_cast_fp16")]; + tensor var_3826_begin_0 = const()[name = string("op_3826_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3826_end_mask_0 = const()[name = string("op_3826_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = concat_384, end_mask = var_3826_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3826_cast_fp16")]; + tensor concat_386x = const()[name = string("concat_386x"), val = tensor([1, -1, 20, 64])]; + tensor var_3836_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3836_cast_fp16")]; + tensor const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_139_cast_fp16 = mul(x = var_3836_cast_fp16, y = const_228_to_fp16)[name = string("q_139_cast_fp16")]; + tensor concat_387x = const()[name = string("concat_387x"), val = tensor([1, -1, 20, 64])]; + tensor var_3843_cast_fp16 = reshape(shape = concat_387x, x = var_3823_cast_fp16)[name = string("op_3843_cast_fp16")]; + tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_175_cast_fp16 = mul(x = var_3843_cast_fp16, y = const_229_to_fp16)[name = string("k_175_cast_fp16")]; + tensor concat_388x = const()[name = string("concat_388x"), val = tensor([1, -1, 20, 64])]; + tensor var_3850_cast_fp16 = reshape(shape = concat_388x, x = var_3826_cast_fp16)[name = string("op_3850_cast_fp16")]; + tensor var_3851 = const()[name = string("op_3851"), val = tensor([0, 2, 1, 3])]; + bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)]; + bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)]; + tensor transpose_325_perm_0 = const()[name = string("transpose_325_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_326_perm_0 = const()[name = string("transpose_326_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_326 = transpose(perm = transpose_326_perm_0, x = k_175_cast_fp16)[name = string("transpose_502")]; + tensor transpose_325 = transpose(perm = transpose_325_perm_0, x = q_139_cast_fp16)[name = string("transpose_503")]; + tensor qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_325, y = transpose_326)[name = string("qk_103_cast_fp16")]; + int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)]; + int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)]; + bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)]; + tensor concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")]; + tensor var_3854_begin_0 = const()[name = string("op_3854_begin_0"), val = tensor([0, 0])]; + tensor var_3854_end_mask_0 = const()[name = string("op_3854_end_mask_0"), val = tensor([false, true])]; + tensor var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = concat_389, end_mask = var_3854_end_mask_0, x = mask_to_fp16)[name = string("op_3854_cast_fp16")]; + int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)]; + int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)]; + bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)]; + tensor concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")]; + tensor var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor([0, 0])]; + tensor var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor([true, false])]; + tensor var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_390, end_mask = var_3855_end_mask_0, x = var_3854_cast_fp16)[name = string("op_3855_cast_fp16")]; + tensor qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3855_cast_fp16)[name = string("qk_105_cast_fp16")]; + tensor var_3858_cast_fp16 = softmax(axis = var_3767, x = qk_105_cast_fp16)[name = string("op_3858_cast_fp16")]; + bool var_3860_transpose_x_0 = const()[name = string("op_3860_transpose_x_0"), val = bool(false)]; + bool var_3860_transpose_y_0 = const()[name = string("op_3860_transpose_y_0"), val = bool(false)]; + tensor v_175_cast_fp16 = transpose(perm = var_3851, x = var_3850_cast_fp16)[name = string("transpose_504")]; + tensor var_3860_cast_fp16 = matmul(transpose_x = var_3860_transpose_x_0, transpose_y = var_3860_transpose_y_0, x = var_3858_cast_fp16, y = v_175_cast_fp16)[name = string("op_3860_cast_fp16")]; + tensor var_3861 = const()[name = string("op_3861"), val = tensor([0, 2, 1, 3])]; + tensor concat_391x = const()[name = string("concat_391x"), val = tensor([1, -1, 1280])]; + tensor var_3862_cast_fp16 = transpose(perm = var_3861, x = var_3860_cast_fp16)[name = string("transpose_501")]; + tensor x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3862_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_3866_to_fp16 = const()[name = string("op_3866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928604288)))]; + tensor var_3867_to_fp16 = const()[name = string("op_3867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931881152)))]; + tensor linear_139_cast_fp16 = linear(bias = var_3867_to_fp16, weight = var_3866_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")]; + tensor var_3874_axes_0 = const()[name = string("op_3874_axes_0"), val = tensor([-1])]; + tensor blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931883776)))]; + tensor blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931886400)))]; + tensor var_3874_cast_fp16 = layer_norm(axes = var_3874_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3874_cast_fp16")]; + tensor var_3883_to_fp16 = const()[name = string("op_3883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931889024)))]; + tensor var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935165888)))]; + tensor linear_140_cast_fp16 = linear(bias = var_3884_to_fp16, weight = var_3883_to_fp16, x = var_3874_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor concat_392 = const()[name = string("concat_392"), val = tensor([0, 0, 0])]; + tensor concat_393 = const()[name = string("concat_393"), val = tensor([0, 1500, 0])]; + tensor k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_394 = const()[name = string("concat_394"), val = tensor([0, 0, 0])]; + tensor concat_395 = const()[name = string("concat_395"), val = tensor([0, 1500, 0])]; + tensor v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, -1, 20, 64])]; + tensor var_3904_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3904_cast_fp16")]; + tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_143_cast_fp16 = mul(x = var_3904_cast_fp16, y = const_230_to_fp16)[name = string("q_143_cast_fp16")]; + tensor var_3910 = const()[name = string("op_3910"), val = tensor([1, 1500, 20, -1])]; + tensor var_3911_cast_fp16 = reshape(shape = var_3910, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3911_cast_fp16")]; + tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_179_cast_fp16 = mul(x = var_3911_cast_fp16, y = const_231_to_fp16)[name = string("k_179_cast_fp16")]; + tensor var_3917 = const()[name = string("op_3917"), val = tensor([1, 1500, 20, -1])]; + tensor var_3918_cast_fp16 = reshape(shape = var_3917, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3918_cast_fp16")]; + tensor var_3919 = const()[name = string("op_3919"), val = tensor([0, 2, 1, 3])]; + bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)]; + bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)]; + tensor transpose_327_perm_0 = const()[name = string("transpose_327_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_328_perm_0 = const()[name = string("transpose_328_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_328 = transpose(perm = transpose_328_perm_0, x = k_179_cast_fp16)[name = string("transpose_498")]; + tensor transpose_327 = transpose(perm = transpose_327_perm_0, x = q_143_cast_fp16)[name = string("transpose_499")]; + tensor qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_327, y = transpose_328)[name = string("qk_107_cast_fp16")]; + tensor var_3923_cast_fp16 = softmax(axis = var_3767, x = qk_107_cast_fp16)[name = string("op_3923_cast_fp16")]; + bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)]; + bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(false)]; + tensor v_179_cast_fp16 = transpose(perm = var_3919, x = var_3918_cast_fp16)[name = string("transpose_500")]; + tensor var_3925_cast_fp16 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = var_3923_cast_fp16, y = v_179_cast_fp16)[name = string("op_3925_cast_fp16")]; + tensor var_3926 = const()[name = string("op_3926"), val = tensor([0, 2, 1, 3])]; + tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, -1, 1280])]; + tensor var_3927_cast_fp16 = transpose(perm = var_3926, x = var_3925_cast_fp16)[name = string("transpose_497")]; + tensor x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3927_cast_fp16)[name = string("x_319_cast_fp16")]; + tensor var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935168512)))]; + tensor var_3932_to_fp16 = const()[name = string("op_3932_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938445376)))]; + tensor linear_141_cast_fp16 = linear(bias = var_3932_to_fp16, weight = var_3931_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")]; + tensor var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938448000)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938450624)))]; + tensor var_3939_cast_fp16 = layer_norm(axes = var_3939_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3939_cast_fp16")]; + tensor var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938453248)))]; + tensor var_3949_to_fp16 = const()[name = string("op_3949_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951560512)))]; + tensor linear_142_cast_fp16 = linear(bias = var_3949_to_fp16, weight = var_3948_to_fp16, x = var_3939_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")]; + tensor x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951570816)))]; + tensor var_3955_to_fp16 = const()[name = string("op_3955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964678080)))]; + tensor linear_143_cast_fp16 = linear(bias = var_3955_to_fp16, weight = var_3954_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")]; + tensor k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; + tensor k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_98)[name = string("k_cache_73_cast_fp16")]; + tensor v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; + tensor v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_99)[name = string("v_cache_73_cast_fp16")]; + tensor k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; + tensor k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")]; + tensor v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; + tensor v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")]; + int32 var_3978 = const()[name = string("op_3978"), val = int32(-1)]; + tensor var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964680704)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964683328)))]; + fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3996_cast_fp16 = layer_norm(axes = var_3996_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3996_cast_fp16")]; + tensor var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964685952)))]; + tensor var_4008_to_fp16 = const()[name = string("op_4008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967962816)))]; + tensor linear_144_cast_fp16 = linear(bias = var_4008_to_fp16, weight = var_4007_to_fp16, x = var_3996_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_4011_to_fp16 = const()[name = string("op_4011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967965440)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4011_to_fp16, x = var_3996_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor var_4015_to_fp16 = const()[name = string("op_4015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971242304)))]; + tensor var_4016_to_fp16 = const()[name = string("op_4016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974519168)))]; + tensor linear_146_cast_fp16 = linear(bias = var_4016_to_fp16, weight = var_4015_to_fp16, x = var_3996_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor var_4018_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_4018_shape_cast_fp16")]; + int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)]; + int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)]; + bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)]; + string var_4018_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4018_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)]; + tensor var_4018_shape_cast_fp16_to_uint16 = cast(dtype = var_4018_shape_cast_fp16_to_uint16_dtype_0, x = var_4018_shape_cast_fp16)[name = string("cast_354")]; + uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_4018_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")]; + string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_353")]; + int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; + tensor expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor([0])]; + tensor expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor([0])]; + tensor expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")]; + tensor concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor([18])]; + int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)]; + bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)]; + tensor concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")]; + tensor concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor([0])]; + tensor concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor([0])]; + tensor concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor([0])]; + int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)]; + bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)]; + tensor concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")]; + tensor k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_98)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = k_cache1)[name = string("coreml_update_state_100")]; + tensor v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_99)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = v_cache1)[name = string("coreml_update_state_101")]; + int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)]; + int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1280)]; + int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)]; + bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)]; + tensor concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")]; + tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_406, end_mask = var_4034_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4034_cast_fp16")]; + tensor var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_406, end_mask = var_4037_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4037_cast_fp16")]; + tensor concat_408x = const()[name = string("concat_408x"), val = tensor([1, -1, 20, 64])]; + tensor var_4047_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4047_cast_fp16")]; + tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_147_cast_fp16 = mul(x = var_4047_cast_fp16, y = const_232_to_fp16)[name = string("q_147_cast_fp16")]; + tensor concat_409x = const()[name = string("concat_409x"), val = tensor([1, -1, 20, 64])]; + tensor var_4054_cast_fp16 = reshape(shape = concat_409x, x = var_4034_cast_fp16)[name = string("op_4054_cast_fp16")]; + tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_185_cast_fp16 = mul(x = var_4054_cast_fp16, y = const_233_to_fp16)[name = string("k_185_cast_fp16")]; + tensor concat_410x = const()[name = string("concat_410x"), val = tensor([1, -1, 20, 64])]; + tensor var_4061_cast_fp16 = reshape(shape = concat_410x, x = var_4037_cast_fp16)[name = string("op_4061_cast_fp16")]; + tensor var_4062 = const()[name = string("op_4062"), val = tensor([0, 2, 1, 3])]; + bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)]; + bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)]; + tensor transpose_329_perm_0 = const()[name = string("transpose_329_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_330_perm_0 = const()[name = string("transpose_330_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_330 = transpose(perm = transpose_330_perm_0, x = k_185_cast_fp16)[name = string("transpose_494")]; + tensor transpose_329 = transpose(perm = transpose_329_perm_0, x = q_147_cast_fp16)[name = string("transpose_495")]; + tensor qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_329, y = transpose_330)[name = string("qk_109_cast_fp16")]; + int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)]; + int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)]; + bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)]; + tensor concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")]; + tensor var_4065_begin_0 = const()[name = string("op_4065_begin_0"), val = tensor([0, 0])]; + tensor var_4065_end_mask_0 = const()[name = string("op_4065_end_mask_0"), val = tensor([false, true])]; + tensor var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = concat_411, end_mask = var_4065_end_mask_0, x = mask_to_fp16)[name = string("op_4065_cast_fp16")]; + int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)]; + int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; + bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; + tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")]; + tensor var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor([0, 0])]; + tensor var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor([true, false])]; + tensor var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = concat_412, end_mask = var_4066_end_mask_0, x = var_4065_cast_fp16)[name = string("op_4066_cast_fp16")]; + tensor qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4066_cast_fp16)[name = string("qk_111_cast_fp16")]; + tensor var_4069_cast_fp16 = softmax(axis = var_3978, x = qk_111_cast_fp16)[name = string("op_4069_cast_fp16")]; + bool var_4071_transpose_x_0 = const()[name = string("op_4071_transpose_x_0"), val = bool(false)]; + bool var_4071_transpose_y_0 = const()[name = string("op_4071_transpose_y_0"), val = bool(false)]; + tensor v_185_cast_fp16 = transpose(perm = var_4062, x = var_4061_cast_fp16)[name = string("transpose_496")]; + tensor var_4071_cast_fp16 = matmul(transpose_x = var_4071_transpose_x_0, transpose_y = var_4071_transpose_y_0, x = var_4069_cast_fp16, y = v_185_cast_fp16)[name = string("op_4071_cast_fp16")]; + tensor var_4072 = const()[name = string("op_4072"), val = tensor([0, 2, 1, 3])]; + tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, -1, 1280])]; + tensor var_4073_cast_fp16 = transpose(perm = var_4072, x = var_4071_cast_fp16)[name = string("transpose_493")]; + tensor x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4073_cast_fp16)[name = string("x_331_cast_fp16")]; + tensor var_4077_to_fp16 = const()[name = string("op_4077_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974521792)))]; + tensor var_4078_to_fp16 = const()[name = string("op_4078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977798656)))]; + tensor linear_147_cast_fp16 = linear(bias = var_4078_to_fp16, weight = var_4077_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")]; + tensor var_4085_axes_0 = const()[name = string("op_4085_axes_0"), val = tensor([-1])]; + tensor blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977801280)))]; + tensor blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977803904)))]; + tensor var_4085_cast_fp16 = layer_norm(axes = var_4085_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4085_cast_fp16")]; + tensor var_4094_to_fp16 = const()[name = string("op_4094_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977806528)))]; + tensor var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981083392)))]; + tensor linear_148_cast_fp16 = linear(bias = var_4095_to_fp16, weight = var_4094_to_fp16, x = var_4085_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor concat_414 = const()[name = string("concat_414"), val = tensor([0, 0, 0])]; + tensor concat_415 = const()[name = string("concat_415"), val = tensor([0, 1500, 0])]; + tensor k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_416 = const()[name = string("concat_416"), val = tensor([0, 0, 0])]; + tensor concat_417 = const()[name = string("concat_417"), val = tensor([0, 1500, 0])]; + tensor v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 20, 64])]; + tensor var_4115_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4115_cast_fp16")]; + tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_151_cast_fp16 = mul(x = var_4115_cast_fp16, y = const_234_to_fp16)[name = string("q_151_cast_fp16")]; + tensor var_4121 = const()[name = string("op_4121"), val = tensor([1, 1500, 20, -1])]; + tensor var_4122_cast_fp16 = reshape(shape = var_4121, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4122_cast_fp16")]; + tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_189_cast_fp16 = mul(x = var_4122_cast_fp16, y = const_235_to_fp16)[name = string("k_189_cast_fp16")]; + tensor var_4128 = const()[name = string("op_4128"), val = tensor([1, 1500, 20, -1])]; + tensor var_4129_cast_fp16 = reshape(shape = var_4128, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4129_cast_fp16")]; + tensor var_4130 = const()[name = string("op_4130"), val = tensor([0, 2, 1, 3])]; + bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)]; + bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)]; + tensor transpose_331_perm_0 = const()[name = string("transpose_331_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_332_perm_0 = const()[name = string("transpose_332_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_332 = transpose(perm = transpose_332_perm_0, x = k_189_cast_fp16)[name = string("transpose_490")]; + tensor transpose_331 = transpose(perm = transpose_331_perm_0, x = q_151_cast_fp16)[name = string("transpose_491")]; + tensor qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_331, y = transpose_332)[name = string("qk_113_cast_fp16")]; + tensor var_4134_cast_fp16 = softmax(axis = var_3978, x = qk_113_cast_fp16)[name = string("op_4134_cast_fp16")]; + bool var_4136_transpose_x_0 = const()[name = string("op_4136_transpose_x_0"), val = bool(false)]; + bool var_4136_transpose_y_0 = const()[name = string("op_4136_transpose_y_0"), val = bool(false)]; + tensor v_189_cast_fp16 = transpose(perm = var_4130, x = var_4129_cast_fp16)[name = string("transpose_492")]; + tensor var_4136_cast_fp16 = matmul(transpose_x = var_4136_transpose_x_0, transpose_y = var_4136_transpose_y_0, x = var_4134_cast_fp16, y = v_189_cast_fp16)[name = string("op_4136_cast_fp16")]; + tensor var_4137 = const()[name = string("op_4137"), val = tensor([0, 2, 1, 3])]; + tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 1280])]; + tensor var_4138_cast_fp16 = transpose(perm = var_4137, x = var_4136_cast_fp16)[name = string("transpose_489")]; + tensor x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4138_cast_fp16)[name = string("x_337_cast_fp16")]; + tensor var_4142_to_fp16 = const()[name = string("op_4142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981086016)))]; + tensor var_4143_to_fp16 = const()[name = string("op_4143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984362880)))]; + tensor linear_149_cast_fp16 = linear(bias = var_4143_to_fp16, weight = var_4142_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")]; + tensor var_4150_axes_0 = const()[name = string("op_4150_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984365504)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984368128)))]; + tensor var_4150_cast_fp16 = layer_norm(axes = var_4150_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4150_cast_fp16")]; + tensor var_4159_to_fp16 = const()[name = string("op_4159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984370752)))]; + tensor var_4160_to_fp16 = const()[name = string("op_4160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997478016)))]; + tensor linear_150_cast_fp16 = linear(bias = var_4160_to_fp16, weight = var_4159_to_fp16, x = var_4150_cast_fp16)[name = string("linear_150_cast_fp16")]; + string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")]; + tensor x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")]; + tensor var_4165_to_fp16 = const()[name = string("op_4165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997488320)))]; + tensor var_4166_to_fp16 = const()[name = string("op_4166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010595584)))]; + tensor linear_151_cast_fp16 = linear(bias = var_4166_to_fp16, weight = var_4165_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")]; + tensor k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; + tensor k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_100)[name = string("k_cache_77_cast_fp16")]; + tensor v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; + tensor v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_101)[name = string("v_cache_77_cast_fp16")]; + tensor k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; + tensor k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")]; + tensor v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; + tensor v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")]; + int32 var_4189 = const()[name = string("op_4189"), val = int32(-1)]; + tensor var_4207_axes_0 = const()[name = string("op_4207_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010598208)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010600832)))]; + fp16 var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4207_cast_fp16 = layer_norm(axes = var_4207_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4207_cast_fp16")]; + tensor var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010603456)))]; + tensor var_4219_to_fp16 = const()[name = string("op_4219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013880320)))]; + tensor linear_152_cast_fp16 = linear(bias = var_4219_to_fp16, weight = var_4218_to_fp16, x = var_4207_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor var_4222_to_fp16 = const()[name = string("op_4222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013882944)))]; + tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4222_to_fp16, x = var_4207_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor var_4226_to_fp16 = const()[name = string("op_4226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017159808)))]; + tensor var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020436672)))]; + tensor linear_154_cast_fp16 = linear(bias = var_4227_to_fp16, weight = var_4226_to_fp16, x = var_4207_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor var_4229_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4229_shape_cast_fp16")]; + int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)]; + int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)]; + bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)]; + string var_4229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)]; + tensor var_4229_shape_cast_fp16_to_uint16 = cast(dtype = var_4229_shape_cast_fp16_to_uint16_dtype_0, x = var_4229_shape_cast_fp16)[name = string("cast_352")]; + uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4229_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")]; + string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_351")]; + int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([0])]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([0])]; + tensor expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor([0])]; + tensor expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")]; + tensor concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor([19])]; + int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)]; + bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)]; + tensor concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")]; + tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([0])]; + tensor concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor([0])]; + tensor concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor([0])]; + int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; + bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; + tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")]; + tensor k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_100)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = k_cache1)[name = string("coreml_update_state_102")]; + tensor v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_101)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = v_cache1)[name = string("coreml_update_state_103")]; + int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)]; + int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1280)]; + int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)]; + bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)]; + tensor concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")]; + tensor var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_428, end_mask = var_4245_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4245_cast_fp16")]; + tensor var_4248_begin_0 = const()[name = string("op_4248_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4248_end_mask_0 = const()[name = string("op_4248_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = concat_428, end_mask = var_4248_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4248_cast_fp16")]; + tensor concat_430x = const()[name = string("concat_430x"), val = tensor([1, -1, 20, 64])]; + tensor var_4258_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4258_cast_fp16")]; + tensor const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_155_cast_fp16 = mul(x = var_4258_cast_fp16, y = const_236_to_fp16)[name = string("q_155_cast_fp16")]; + tensor concat_431x = const()[name = string("concat_431x"), val = tensor([1, -1, 20, 64])]; + tensor var_4265_cast_fp16 = reshape(shape = concat_431x, x = var_4245_cast_fp16)[name = string("op_4265_cast_fp16")]; + tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_195_cast_fp16 = mul(x = var_4265_cast_fp16, y = const_237_to_fp16)[name = string("k_195_cast_fp16")]; + tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, -1, 20, 64])]; + tensor var_4272_cast_fp16 = reshape(shape = concat_432x, x = var_4248_cast_fp16)[name = string("op_4272_cast_fp16")]; + tensor var_4273 = const()[name = string("op_4273"), val = tensor([0, 2, 1, 3])]; + bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)]; + bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)]; + tensor transpose_333_perm_0 = const()[name = string("transpose_333_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_334_perm_0 = const()[name = string("transpose_334_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_334 = transpose(perm = transpose_334_perm_0, x = k_195_cast_fp16)[name = string("transpose_486")]; + tensor transpose_333 = transpose(perm = transpose_333_perm_0, x = q_155_cast_fp16)[name = string("transpose_487")]; + tensor qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_333, y = transpose_334)[name = string("qk_115_cast_fp16")]; + int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)]; + int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; + bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; + tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")]; + tensor var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor([0, 0])]; + tensor var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor([false, true])]; + tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = concat_433, end_mask = var_4276_end_mask_0, x = mask_to_fp16)[name = string("op_4276_cast_fp16")]; + int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)]; + int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; + bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; + tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")]; + tensor var_4277_begin_0 = const()[name = string("op_4277_begin_0"), val = tensor([0, 0])]; + tensor var_4277_end_mask_0 = const()[name = string("op_4277_end_mask_0"), val = tensor([true, false])]; + tensor var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = concat_434, end_mask = var_4277_end_mask_0, x = var_4276_cast_fp16)[name = string("op_4277_cast_fp16")]; + tensor qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4277_cast_fp16)[name = string("qk_117_cast_fp16")]; + tensor var_4280_cast_fp16 = softmax(axis = var_4189, x = qk_117_cast_fp16)[name = string("op_4280_cast_fp16")]; + bool var_4282_transpose_x_0 = const()[name = string("op_4282_transpose_x_0"), val = bool(false)]; + bool var_4282_transpose_y_0 = const()[name = string("op_4282_transpose_y_0"), val = bool(false)]; + tensor v_195_cast_fp16 = transpose(perm = var_4273, x = var_4272_cast_fp16)[name = string("transpose_488")]; + tensor var_4282_cast_fp16 = matmul(transpose_x = var_4282_transpose_x_0, transpose_y = var_4282_transpose_y_0, x = var_4280_cast_fp16, y = v_195_cast_fp16)[name = string("op_4282_cast_fp16")]; + tensor var_4283 = const()[name = string("op_4283"), val = tensor([0, 2, 1, 3])]; + tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, -1, 1280])]; + tensor var_4284_cast_fp16 = transpose(perm = var_4283, x = var_4282_cast_fp16)[name = string("transpose_485")]; + tensor x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4284_cast_fp16)[name = string("x_349_cast_fp16")]; + tensor var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020439296)))]; + tensor var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023716160)))]; + tensor linear_155_cast_fp16 = linear(bias = var_4289_to_fp16, weight = var_4288_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")]; + tensor var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor([-1])]; + tensor blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023718784)))]; + tensor blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023721408)))]; + tensor var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4296_cast_fp16")]; + tensor var_4305_to_fp16 = const()[name = string("op_4305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023724032)))]; + tensor var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027000896)))]; + tensor linear_156_cast_fp16 = linear(bias = var_4306_to_fp16, weight = var_4305_to_fp16, x = var_4296_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_436 = const()[name = string("concat_436"), val = tensor([0, 0, 0])]; + tensor concat_437 = const()[name = string("concat_437"), val = tensor([0, 1500, 0])]; + tensor k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_438 = const()[name = string("concat_438"), val = tensor([0, 0, 0])]; + tensor concat_439 = const()[name = string("concat_439"), val = tensor([0, 1500, 0])]; + tensor v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 20, 64])]; + tensor var_4326_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4326_cast_fp16")]; + tensor const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_159_cast_fp16 = mul(x = var_4326_cast_fp16, y = const_238_to_fp16)[name = string("q_159_cast_fp16")]; + tensor var_4332 = const()[name = string("op_4332"), val = tensor([1, 1500, 20, -1])]; + tensor var_4333_cast_fp16 = reshape(shape = var_4332, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4333_cast_fp16")]; + tensor const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_199_cast_fp16 = mul(x = var_4333_cast_fp16, y = const_239_to_fp16)[name = string("k_199_cast_fp16")]; + tensor var_4339 = const()[name = string("op_4339"), val = tensor([1, 1500, 20, -1])]; + tensor var_4340_cast_fp16 = reshape(shape = var_4339, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4340_cast_fp16")]; + tensor var_4341 = const()[name = string("op_4341"), val = tensor([0, 2, 1, 3])]; + bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)]; + bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)]; + tensor transpose_335_perm_0 = const()[name = string("transpose_335_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_336_perm_0 = const()[name = string("transpose_336_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_336 = transpose(perm = transpose_336_perm_0, x = k_199_cast_fp16)[name = string("transpose_482")]; + tensor transpose_335 = transpose(perm = transpose_335_perm_0, x = q_159_cast_fp16)[name = string("transpose_483")]; + tensor qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_335, y = transpose_336)[name = string("qk_119_cast_fp16")]; + tensor var_4345_cast_fp16 = softmax(axis = var_4189, x = qk_119_cast_fp16)[name = string("op_4345_cast_fp16")]; + bool var_4347_transpose_x_0 = const()[name = string("op_4347_transpose_x_0"), val = bool(false)]; + bool var_4347_transpose_y_0 = const()[name = string("op_4347_transpose_y_0"), val = bool(false)]; + tensor v_199_cast_fp16 = transpose(perm = var_4341, x = var_4340_cast_fp16)[name = string("transpose_484")]; + tensor var_4347_cast_fp16 = matmul(transpose_x = var_4347_transpose_x_0, transpose_y = var_4347_transpose_y_0, x = var_4345_cast_fp16, y = v_199_cast_fp16)[name = string("op_4347_cast_fp16")]; + tensor var_4348 = const()[name = string("op_4348"), val = tensor([0, 2, 1, 3])]; + tensor concat_441x = const()[name = string("concat_441x"), val = tensor([1, -1, 1280])]; + tensor var_4349_cast_fp16 = transpose(perm = var_4348, x = var_4347_cast_fp16)[name = string("transpose_481")]; + tensor x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4349_cast_fp16)[name = string("x_355_cast_fp16")]; + tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027003520)))]; + tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030280384)))]; + tensor linear_157_cast_fp16 = linear(bias = var_4354_to_fp16, weight = var_4353_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030283008)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030285632)))]; + tensor var_4361_cast_fp16 = layer_norm(axes = var_4361_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4361_cast_fp16")]; + tensor var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030288256)))]; + tensor var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043395520)))]; + tensor linear_158_cast_fp16 = linear(bias = var_4371_to_fp16, weight = var_4370_to_fp16, x = var_4361_cast_fp16)[name = string("linear_158_cast_fp16")]; + string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")]; + tensor x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043405824)))]; + tensor var_4377_to_fp16 = const()[name = string("op_4377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056513088)))]; + tensor linear_159_cast_fp16 = linear(bias = var_4377_to_fp16, weight = var_4376_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")]; + tensor k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; + tensor k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_102)[name = string("k_cache_81_cast_fp16")]; + tensor v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; + tensor v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_103)[name = string("v_cache_81_cast_fp16")]; + tensor k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; + tensor k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")]; + tensor v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; + tensor v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")]; + int32 var_4400 = const()[name = string("op_4400"), val = int32(-1)]; + tensor var_4418_axes_0 = const()[name = string("op_4418_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056515712)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056518336)))]; + fp16 var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4418_cast_fp16 = layer_norm(axes = var_4418_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4418_cast_fp16")]; + tensor var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056520960)))]; + tensor var_4430_to_fp16 = const()[name = string("op_4430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059797824)))]; + tensor linear_160_cast_fp16 = linear(bias = var_4430_to_fp16, weight = var_4429_to_fp16, x = var_4418_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor var_4433_to_fp16 = const()[name = string("op_4433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059800448)))]; + tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4433_to_fp16, x = var_4418_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor var_4437_to_fp16 = const()[name = string("op_4437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1063077312)))]; + tensor var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066354176)))]; + tensor linear_162_cast_fp16 = linear(bias = var_4438_to_fp16, weight = var_4437_to_fp16, x = var_4418_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor var_4440_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4440_shape_cast_fp16")]; + int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)]; + int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)]; + bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)]; + string var_4440_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4440_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)]; + tensor var_4440_shape_cast_fp16_to_uint16 = cast(dtype = var_4440_shape_cast_fp16_to_uint16_dtype_0, x = var_4440_shape_cast_fp16)[name = string("cast_350")]; + uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4440_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")]; + string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_349")]; + int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")]; + tensor expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor([0])]; + tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([0])]; + tensor expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor([0])]; + tensor expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")]; + tensor concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor([20])]; + int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)]; + bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)]; + tensor concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")]; + tensor concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor([0])]; + tensor concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor([0])]; + tensor concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor([0])]; + int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)]; + bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)]; + tensor concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")]; + tensor k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_102)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = k_cache1)[name = string("coreml_update_state_104")]; + tensor v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_103)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = v_cache1)[name = string("coreml_update_state_105")]; + int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)]; + int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1280)]; + int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; + bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; + tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")]; + tensor var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_450, end_mask = var_4456_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4456_cast_fp16")]; + tensor var_4459_begin_0 = const()[name = string("op_4459_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4459_end_mask_0 = const()[name = string("op_4459_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4459_cast_fp16 = slice_by_index(begin = var_4459_begin_0, end = concat_450, end_mask = var_4459_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4459_cast_fp16")]; + tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, -1, 20, 64])]; + tensor var_4469_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4469_cast_fp16")]; + tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_163_cast_fp16 = mul(x = var_4469_cast_fp16, y = const_240_to_fp16)[name = string("q_163_cast_fp16")]; + tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, -1, 20, 64])]; + tensor var_4476_cast_fp16 = reshape(shape = concat_453x, x = var_4456_cast_fp16)[name = string("op_4476_cast_fp16")]; + tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_205_cast_fp16 = mul(x = var_4476_cast_fp16, y = const_241_to_fp16)[name = string("k_205_cast_fp16")]; + tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, -1, 20, 64])]; + tensor var_4483_cast_fp16 = reshape(shape = concat_454x, x = var_4459_cast_fp16)[name = string("op_4483_cast_fp16")]; + tensor var_4484 = const()[name = string("op_4484"), val = tensor([0, 2, 1, 3])]; + bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)]; + bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)]; + tensor transpose_337_perm_0 = const()[name = string("transpose_337_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_338_perm_0 = const()[name = string("transpose_338_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_338 = transpose(perm = transpose_338_perm_0, x = k_205_cast_fp16)[name = string("transpose_478")]; + tensor transpose_337 = transpose(perm = transpose_337_perm_0, x = q_163_cast_fp16)[name = string("transpose_479")]; + tensor qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_337, y = transpose_338)[name = string("qk_121_cast_fp16")]; + int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)]; + int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; + bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; + tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")]; + tensor var_4487_begin_0 = const()[name = string("op_4487_begin_0"), val = tensor([0, 0])]; + tensor var_4487_end_mask_0 = const()[name = string("op_4487_end_mask_0"), val = tensor([false, true])]; + tensor var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = concat_455, end_mask = var_4487_end_mask_0, x = mask_to_fp16)[name = string("op_4487_cast_fp16")]; + int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)]; + int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; + bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; + tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")]; + tensor var_4488_begin_0 = const()[name = string("op_4488_begin_0"), val = tensor([0, 0])]; + tensor var_4488_end_mask_0 = const()[name = string("op_4488_end_mask_0"), val = tensor([true, false])]; + tensor var_4488_cast_fp16 = slice_by_index(begin = var_4488_begin_0, end = concat_456, end_mask = var_4488_end_mask_0, x = var_4487_cast_fp16)[name = string("op_4488_cast_fp16")]; + tensor qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4488_cast_fp16)[name = string("qk_123_cast_fp16")]; + tensor var_4491_cast_fp16 = softmax(axis = var_4400, x = qk_123_cast_fp16)[name = string("op_4491_cast_fp16")]; + bool var_4493_transpose_x_0 = const()[name = string("op_4493_transpose_x_0"), val = bool(false)]; + bool var_4493_transpose_y_0 = const()[name = string("op_4493_transpose_y_0"), val = bool(false)]; + tensor v_205_cast_fp16 = transpose(perm = var_4484, x = var_4483_cast_fp16)[name = string("transpose_480")]; + tensor var_4493_cast_fp16 = matmul(transpose_x = var_4493_transpose_x_0, transpose_y = var_4493_transpose_y_0, x = var_4491_cast_fp16, y = v_205_cast_fp16)[name = string("op_4493_cast_fp16")]; + tensor var_4494 = const()[name = string("op_4494"), val = tensor([0, 2, 1, 3])]; + tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 1280])]; + tensor var_4495_cast_fp16 = transpose(perm = var_4494, x = var_4493_cast_fp16)[name = string("transpose_477")]; + tensor x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4495_cast_fp16)[name = string("x_367_cast_fp16")]; + tensor var_4499_to_fp16 = const()[name = string("op_4499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066356800)))]; + tensor var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069633664)))]; + tensor linear_163_cast_fp16 = linear(bias = var_4500_to_fp16, weight = var_4499_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")]; + tensor var_4507_axes_0 = const()[name = string("op_4507_axes_0"), val = tensor([-1])]; + tensor blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069636288)))]; + tensor blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069638912)))]; + tensor var_4507_cast_fp16 = layer_norm(axes = var_4507_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4507_cast_fp16")]; + tensor var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069641536)))]; + tensor var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072918400)))]; + tensor linear_164_cast_fp16 = linear(bias = var_4517_to_fp16, weight = var_4516_to_fp16, x = var_4507_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor concat_458 = const()[name = string("concat_458"), val = tensor([0, 0, 0])]; + tensor concat_459 = const()[name = string("concat_459"), val = tensor([0, 1500, 0])]; + tensor k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_460 = const()[name = string("concat_460"), val = tensor([0, 0, 0])]; + tensor concat_461 = const()[name = string("concat_461"), val = tensor([0, 1500, 0])]; + tensor v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_462x = const()[name = string("concat_462x"), val = tensor([1, -1, 20, 64])]; + tensor var_4537_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4537_cast_fp16")]; + tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_167_cast_fp16 = mul(x = var_4537_cast_fp16, y = const_242_to_fp16)[name = string("q_167_cast_fp16")]; + tensor var_4543 = const()[name = string("op_4543"), val = tensor([1, 1500, 20, -1])]; + tensor var_4544_cast_fp16 = reshape(shape = var_4543, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4544_cast_fp16")]; + tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_209_cast_fp16 = mul(x = var_4544_cast_fp16, y = const_243_to_fp16)[name = string("k_209_cast_fp16")]; + tensor var_4550 = const()[name = string("op_4550"), val = tensor([1, 1500, 20, -1])]; + tensor var_4551_cast_fp16 = reshape(shape = var_4550, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4551_cast_fp16")]; + tensor var_4552 = const()[name = string("op_4552"), val = tensor([0, 2, 1, 3])]; + bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)]; + bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)]; + tensor transpose_339_perm_0 = const()[name = string("transpose_339_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_340_perm_0 = const()[name = string("transpose_340_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_340 = transpose(perm = transpose_340_perm_0, x = k_209_cast_fp16)[name = string("transpose_474")]; + tensor transpose_339 = transpose(perm = transpose_339_perm_0, x = q_167_cast_fp16)[name = string("transpose_475")]; + tensor qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_339, y = transpose_340)[name = string("qk_125_cast_fp16")]; + tensor var_4556_cast_fp16 = softmax(axis = var_4400, x = qk_125_cast_fp16)[name = string("op_4556_cast_fp16")]; + bool var_4558_transpose_x_0 = const()[name = string("op_4558_transpose_x_0"), val = bool(false)]; + bool var_4558_transpose_y_0 = const()[name = string("op_4558_transpose_y_0"), val = bool(false)]; + tensor v_209_cast_fp16 = transpose(perm = var_4552, x = var_4551_cast_fp16)[name = string("transpose_476")]; + tensor var_4558_cast_fp16 = matmul(transpose_x = var_4558_transpose_x_0, transpose_y = var_4558_transpose_y_0, x = var_4556_cast_fp16, y = v_209_cast_fp16)[name = string("op_4558_cast_fp16")]; + tensor var_4559 = const()[name = string("op_4559"), val = tensor([0, 2, 1, 3])]; + tensor concat_463x = const()[name = string("concat_463x"), val = tensor([1, -1, 1280])]; + tensor var_4560_cast_fp16 = transpose(perm = var_4559, x = var_4558_cast_fp16)[name = string("transpose_473")]; + tensor x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4560_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_4564_to_fp16 = const()[name = string("op_4564_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072921024)))]; + tensor var_4565_to_fp16 = const()[name = string("op_4565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076197888)))]; + tensor linear_165_cast_fp16 = linear(bias = var_4565_to_fp16, weight = var_4564_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")]; + tensor var_4572_axes_0 = const()[name = string("op_4572_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076200512)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076203136)))]; + tensor var_4572_cast_fp16 = layer_norm(axes = var_4572_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4572_cast_fp16")]; + tensor var_4581_to_fp16 = const()[name = string("op_4581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076205760)))]; + tensor var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089313024)))]; + tensor linear_166_cast_fp16 = linear(bias = var_4582_to_fp16, weight = var_4581_to_fp16, x = var_4572_cast_fp16)[name = string("linear_166_cast_fp16")]; + string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")]; + tensor x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")]; + tensor var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089323328)))]; + tensor var_4588_to_fp16 = const()[name = string("op_4588_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102430592)))]; + tensor linear_167_cast_fp16 = linear(bias = var_4588_to_fp16, weight = var_4587_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")]; + tensor k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; + tensor k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_104)[name = string("k_cache_85_cast_fp16")]; + tensor v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; + tensor v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_105)[name = string("v_cache_85_cast_fp16")]; + tensor k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; + tensor k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")]; + tensor v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; + tensor v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")]; + int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)]; + tensor var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102433216)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102435840)))]; + fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4629_cast_fp16 = layer_norm(axes = var_4629_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4629_cast_fp16")]; + tensor var_4640_to_fp16 = const()[name = string("op_4640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102438464)))]; + tensor var_4641_to_fp16 = const()[name = string("op_4641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105715328)))]; + tensor linear_168_cast_fp16 = linear(bias = var_4641_to_fp16, weight = var_4640_to_fp16, x = var_4629_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor var_4644_to_fp16 = const()[name = string("op_4644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105717952)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4644_to_fp16, x = var_4629_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor var_4648_to_fp16 = const()[name = string("op_4648_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108994816)))]; + tensor var_4649_to_fp16 = const()[name = string("op_4649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112271680)))]; + tensor linear_170_cast_fp16 = linear(bias = var_4649_to_fp16, weight = var_4648_to_fp16, x = var_4629_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor var_4651_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4651_shape_cast_fp16")]; + int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)]; + int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)]; + bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)]; + string var_4651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)]; + tensor var_4651_shape_cast_fp16_to_uint16 = cast(dtype = var_4651_shape_cast_fp16_to_uint16_dtype_0, x = var_4651_shape_cast_fp16)[name = string("cast_348")]; + uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4651_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")]; + string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_347")]; + int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")]; + tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; + tensor expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor([0])]; + tensor expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor([0])]; + tensor expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")]; + tensor concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor([21])]; + int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)]; + bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)]; + tensor concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")]; + tensor concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor([0])]; + tensor concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor([0])]; + tensor concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor([0])]; + int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)]; + bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)]; + tensor concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")]; + tensor k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_104)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = k_cache1)[name = string("coreml_update_state_106")]; + tensor v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_105)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = v_cache1)[name = string("coreml_update_state_107")]; + int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)]; + int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1280)]; + int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; + bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; + tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")]; + tensor var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_472, end_mask = var_4667_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4667_cast_fp16")]; + tensor var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = concat_472, end_mask = var_4670_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4670_cast_fp16")]; + tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 20, 64])]; + tensor var_4680_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4680_cast_fp16")]; + tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_171_cast_fp16 = mul(x = var_4680_cast_fp16, y = const_244_to_fp16)[name = string("q_171_cast_fp16")]; + tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 20, 64])]; + tensor var_4687_cast_fp16 = reshape(shape = concat_475x, x = var_4667_cast_fp16)[name = string("op_4687_cast_fp16")]; + tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_215_cast_fp16 = mul(x = var_4687_cast_fp16, y = const_245_to_fp16)[name = string("k_215_cast_fp16")]; + tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 20, 64])]; + tensor var_4694_cast_fp16 = reshape(shape = concat_476x, x = var_4670_cast_fp16)[name = string("op_4694_cast_fp16")]; + tensor var_4695 = const()[name = string("op_4695"), val = tensor([0, 2, 1, 3])]; + bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)]; + bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)]; + tensor transpose_341_perm_0 = const()[name = string("transpose_341_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_342_perm_0 = const()[name = string("transpose_342_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_342 = transpose(perm = transpose_342_perm_0, x = k_215_cast_fp16)[name = string("transpose_470")]; + tensor transpose_341 = transpose(perm = transpose_341_perm_0, x = q_171_cast_fp16)[name = string("transpose_471")]; + tensor qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_341, y = transpose_342)[name = string("qk_127_cast_fp16")]; + int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)]; + int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)]; + bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)]; + tensor concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")]; + tensor var_4698_begin_0 = const()[name = string("op_4698_begin_0"), val = tensor([0, 0])]; + tensor var_4698_end_mask_0 = const()[name = string("op_4698_end_mask_0"), val = tensor([false, true])]; + tensor var_4698_cast_fp16 = slice_by_index(begin = var_4698_begin_0, end = concat_477, end_mask = var_4698_end_mask_0, x = mask_to_fp16)[name = string("op_4698_cast_fp16")]; + int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)]; + int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)]; + bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)]; + tensor concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")]; + tensor var_4699_begin_0 = const()[name = string("op_4699_begin_0"), val = tensor([0, 0])]; + tensor var_4699_end_mask_0 = const()[name = string("op_4699_end_mask_0"), val = tensor([true, false])]; + tensor var_4699_cast_fp16 = slice_by_index(begin = var_4699_begin_0, end = concat_478, end_mask = var_4699_end_mask_0, x = var_4698_cast_fp16)[name = string("op_4699_cast_fp16")]; + tensor qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4699_cast_fp16)[name = string("qk_129_cast_fp16")]; + tensor var_4702_cast_fp16 = softmax(axis = var_4611, x = qk_129_cast_fp16)[name = string("op_4702_cast_fp16")]; + bool var_4704_transpose_x_0 = const()[name = string("op_4704_transpose_x_0"), val = bool(false)]; + bool var_4704_transpose_y_0 = const()[name = string("op_4704_transpose_y_0"), val = bool(false)]; + tensor v_215_cast_fp16 = transpose(perm = var_4695, x = var_4694_cast_fp16)[name = string("transpose_472")]; + tensor var_4704_cast_fp16 = matmul(transpose_x = var_4704_transpose_x_0, transpose_y = var_4704_transpose_y_0, x = var_4702_cast_fp16, y = v_215_cast_fp16)[name = string("op_4704_cast_fp16")]; + tensor var_4705 = const()[name = string("op_4705"), val = tensor([0, 2, 1, 3])]; + tensor concat_479x = const()[name = string("concat_479x"), val = tensor([1, -1, 1280])]; + tensor var_4706_cast_fp16 = transpose(perm = var_4705, x = var_4704_cast_fp16)[name = string("transpose_469")]; + tensor x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4706_cast_fp16)[name = string("x_385_cast_fp16")]; + tensor var_4710_to_fp16 = const()[name = string("op_4710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112274304)))]; + tensor var_4711_to_fp16 = const()[name = string("op_4711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115551168)))]; + tensor linear_171_cast_fp16 = linear(bias = var_4711_to_fp16, weight = var_4710_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")]; + tensor var_4718_axes_0 = const()[name = string("op_4718_axes_0"), val = tensor([-1])]; + tensor blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115553792)))]; + tensor blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115556416)))]; + tensor var_4718_cast_fp16 = layer_norm(axes = var_4718_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4718_cast_fp16")]; + tensor var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115559040)))]; + tensor var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118835904)))]; + tensor linear_172_cast_fp16 = linear(bias = var_4728_to_fp16, weight = var_4727_to_fp16, x = var_4718_cast_fp16)[name = string("linear_172_cast_fp16")]; + tensor concat_480 = const()[name = string("concat_480"), val = tensor([0, 0, 0])]; + tensor concat_481 = const()[name = string("concat_481"), val = tensor([0, 1500, 0])]; + tensor k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_482 = const()[name = string("concat_482"), val = tensor([0, 0, 0])]; + tensor concat_483 = const()[name = string("concat_483"), val = tensor([0, 1500, 0])]; + tensor v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_484x = const()[name = string("concat_484x"), val = tensor([1, -1, 20, 64])]; + tensor var_4748_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4748_cast_fp16")]; + tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_175_cast_fp16 = mul(x = var_4748_cast_fp16, y = const_246_to_fp16)[name = string("q_175_cast_fp16")]; + tensor var_4754 = const()[name = string("op_4754"), val = tensor([1, 1500, 20, -1])]; + tensor var_4755_cast_fp16 = reshape(shape = var_4754, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4755_cast_fp16")]; + tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_219_cast_fp16 = mul(x = var_4755_cast_fp16, y = const_247_to_fp16)[name = string("k_219_cast_fp16")]; + tensor var_4761 = const()[name = string("op_4761"), val = tensor([1, 1500, 20, -1])]; + tensor var_4762_cast_fp16 = reshape(shape = var_4761, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4762_cast_fp16")]; + tensor var_4763 = const()[name = string("op_4763"), val = tensor([0, 2, 1, 3])]; + bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)]; + bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)]; + tensor transpose_343_perm_0 = const()[name = string("transpose_343_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_344_perm_0 = const()[name = string("transpose_344_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_344 = transpose(perm = transpose_344_perm_0, x = k_219_cast_fp16)[name = string("transpose_466")]; + tensor transpose_343 = transpose(perm = transpose_343_perm_0, x = q_175_cast_fp16)[name = string("transpose_467")]; + tensor qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_343, y = transpose_344)[name = string("qk_131_cast_fp16")]; + tensor var_4767_cast_fp16 = softmax(axis = var_4611, x = qk_131_cast_fp16)[name = string("op_4767_cast_fp16")]; + bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)]; + bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)]; + tensor v_219_cast_fp16 = transpose(perm = var_4763, x = var_4762_cast_fp16)[name = string("transpose_468")]; + tensor var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4767_cast_fp16, y = v_219_cast_fp16)[name = string("op_4769_cast_fp16")]; + tensor var_4770 = const()[name = string("op_4770"), val = tensor([0, 2, 1, 3])]; + tensor concat_485x = const()[name = string("concat_485x"), val = tensor([1, -1, 1280])]; + tensor var_4771_cast_fp16 = transpose(perm = var_4770, x = var_4769_cast_fp16)[name = string("transpose_465")]; + tensor x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4771_cast_fp16)[name = string("x_391_cast_fp16")]; + tensor var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118838528)))]; + tensor var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122115392)))]; + tensor linear_173_cast_fp16 = linear(bias = var_4776_to_fp16, weight = var_4775_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")]; + tensor var_4783_axes_0 = const()[name = string("op_4783_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122118016)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122120640)))]; + tensor var_4783_cast_fp16 = layer_norm(axes = var_4783_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4783_cast_fp16")]; + tensor var_4792_to_fp16 = const()[name = string("op_4792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122123264)))]; + tensor var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135230528)))]; + tensor linear_174_cast_fp16 = linear(bias = var_4793_to_fp16, weight = var_4792_to_fp16, x = var_4783_cast_fp16)[name = string("linear_174_cast_fp16")]; + string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")]; + tensor x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")]; + tensor var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135240832)))]; + tensor var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148348096)))]; + tensor linear_175_cast_fp16 = linear(bias = var_4799_to_fp16, weight = var_4798_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")]; + tensor k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; + tensor k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_106)[name = string("k_cache_89_cast_fp16")]; + tensor v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; + tensor v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_107)[name = string("v_cache_89_cast_fp16")]; + tensor k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; + tensor k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")]; + tensor v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; + tensor v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")]; + int32 var_4822 = const()[name = string("op_4822"), val = int32(-1)]; + tensor var_4840_axes_0 = const()[name = string("op_4840_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148350720)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148353344)))]; + fp16 var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4840_cast_fp16 = layer_norm(axes = var_4840_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4840_cast_fp16")]; + tensor var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148355968)))]; + tensor var_4852_to_fp16 = const()[name = string("op_4852_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151632832)))]; + tensor linear_176_cast_fp16 = linear(bias = var_4852_to_fp16, weight = var_4851_to_fp16, x = var_4840_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor var_4855_to_fp16 = const()[name = string("op_4855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151635456)))]; + tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4855_to_fp16, x = var_4840_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154912320)))]; + tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158189184)))]; + tensor linear_178_cast_fp16 = linear(bias = var_4860_to_fp16, weight = var_4859_to_fp16, x = var_4840_cast_fp16)[name = string("linear_178_cast_fp16")]; + tensor var_4862_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4862_shape_cast_fp16")]; + int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; + int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; + bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; + string var_4862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)]; + tensor var_4862_shape_cast_fp16_to_uint16 = cast(dtype = var_4862_shape_cast_fp16_to_uint16_dtype_0, x = var_4862_shape_cast_fp16)[name = string("cast_346")]; + uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4862_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; + string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_345")]; + int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")]; + tensor expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor([0])]; + tensor expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor([0])]; + tensor expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor([0])]; + tensor expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")]; + tensor concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor([22])]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")]; + tensor concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor([0])]; + tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; + tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; + int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; + bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; + tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")]; + tensor k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_106)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = k_cache1)[name = string("coreml_update_state_108")]; + tensor v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_107)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = v_cache1)[name = string("coreml_update_state_109")]; + int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)]; + int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1280)]; + int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; + bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; + tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")]; + tensor var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_494, end_mask = var_4878_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4878_cast_fp16")]; + tensor var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = concat_494, end_mask = var_4881_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4881_cast_fp16")]; + tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 20, 64])]; + tensor var_4891_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4891_cast_fp16")]; + tensor const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_179_cast_fp16 = mul(x = var_4891_cast_fp16, y = const_248_to_fp16)[name = string("q_179_cast_fp16")]; + tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 20, 64])]; + tensor var_4898_cast_fp16 = reshape(shape = concat_497x, x = var_4878_cast_fp16)[name = string("op_4898_cast_fp16")]; + tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_225_cast_fp16 = mul(x = var_4898_cast_fp16, y = const_249_to_fp16)[name = string("k_225_cast_fp16")]; + tensor concat_498x = const()[name = string("concat_498x"), val = tensor([1, -1, 20, 64])]; + tensor var_4905_cast_fp16 = reshape(shape = concat_498x, x = var_4881_cast_fp16)[name = string("op_4905_cast_fp16")]; + tensor var_4906 = const()[name = string("op_4906"), val = tensor([0, 2, 1, 3])]; + bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)]; + bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)]; + tensor transpose_345_perm_0 = const()[name = string("transpose_345_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_346_perm_0 = const()[name = string("transpose_346_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_346 = transpose(perm = transpose_346_perm_0, x = k_225_cast_fp16)[name = string("transpose_462")]; + tensor transpose_345 = transpose(perm = transpose_345_perm_0, x = q_179_cast_fp16)[name = string("transpose_463")]; + tensor qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_345, y = transpose_346)[name = string("qk_133_cast_fp16")]; + int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)]; + int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; + bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; + tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")]; + tensor var_4909_begin_0 = const()[name = string("op_4909_begin_0"), val = tensor([0, 0])]; + tensor var_4909_end_mask_0 = const()[name = string("op_4909_end_mask_0"), val = tensor([false, true])]; + tensor var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = concat_499, end_mask = var_4909_end_mask_0, x = mask_to_fp16)[name = string("op_4909_cast_fp16")]; + int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)]; + int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; + bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; + tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")]; + tensor var_4910_begin_0 = const()[name = string("op_4910_begin_0"), val = tensor([0, 0])]; + tensor var_4910_end_mask_0 = const()[name = string("op_4910_end_mask_0"), val = tensor([true, false])]; + tensor var_4910_cast_fp16 = slice_by_index(begin = var_4910_begin_0, end = concat_500, end_mask = var_4910_end_mask_0, x = var_4909_cast_fp16)[name = string("op_4910_cast_fp16")]; + tensor qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4910_cast_fp16)[name = string("qk_135_cast_fp16")]; + tensor var_4913_cast_fp16 = softmax(axis = var_4822, x = qk_135_cast_fp16)[name = string("op_4913_cast_fp16")]; + bool var_4915_transpose_x_0 = const()[name = string("op_4915_transpose_x_0"), val = bool(false)]; + bool var_4915_transpose_y_0 = const()[name = string("op_4915_transpose_y_0"), val = bool(false)]; + tensor v_225_cast_fp16 = transpose(perm = var_4906, x = var_4905_cast_fp16)[name = string("transpose_464")]; + tensor var_4915_cast_fp16 = matmul(transpose_x = var_4915_transpose_x_0, transpose_y = var_4915_transpose_y_0, x = var_4913_cast_fp16, y = v_225_cast_fp16)[name = string("op_4915_cast_fp16")]; + tensor var_4916 = const()[name = string("op_4916"), val = tensor([0, 2, 1, 3])]; + tensor concat_501x = const()[name = string("concat_501x"), val = tensor([1, -1, 1280])]; + tensor var_4917_cast_fp16 = transpose(perm = var_4916, x = var_4915_cast_fp16)[name = string("transpose_461")]; + tensor x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4917_cast_fp16)[name = string("x_403_cast_fp16")]; + tensor var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158191808)))]; + tensor var_4922_to_fp16 = const()[name = string("op_4922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161468672)))]; + tensor linear_179_cast_fp16 = linear(bias = var_4922_to_fp16, weight = var_4921_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_4929_axes_0 = const()[name = string("op_4929_axes_0"), val = tensor([-1])]; + tensor blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161471296)))]; + tensor blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161473920)))]; + tensor var_4929_cast_fp16 = layer_norm(axes = var_4929_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4929_cast_fp16")]; + tensor var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161476544)))]; + tensor var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164753408)))]; + tensor linear_180_cast_fp16 = linear(bias = var_4939_to_fp16, weight = var_4938_to_fp16, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor concat_502 = const()[name = string("concat_502"), val = tensor([0, 0, 0])]; + tensor concat_503 = const()[name = string("concat_503"), val = tensor([0, 1500, 0])]; + tensor k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_504 = const()[name = string("concat_504"), val = tensor([0, 0, 0])]; + tensor concat_505 = const()[name = string("concat_505"), val = tensor([0, 1500, 0])]; + tensor v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_506x = const()[name = string("concat_506x"), val = tensor([1, -1, 20, 64])]; + tensor var_4959_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4959_cast_fp16")]; + tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_183_cast_fp16 = mul(x = var_4959_cast_fp16, y = const_250_to_fp16)[name = string("q_183_cast_fp16")]; + tensor var_4965 = const()[name = string("op_4965"), val = tensor([1, 1500, 20, -1])]; + tensor var_4966_cast_fp16 = reshape(shape = var_4965, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4966_cast_fp16")]; + tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_229_cast_fp16 = mul(x = var_4966_cast_fp16, y = const_251_to_fp16)[name = string("k_229_cast_fp16")]; + tensor var_4972 = const()[name = string("op_4972"), val = tensor([1, 1500, 20, -1])]; + tensor var_4973_cast_fp16 = reshape(shape = var_4972, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4973_cast_fp16")]; + tensor var_4974 = const()[name = string("op_4974"), val = tensor([0, 2, 1, 3])]; + bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)]; + bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)]; + tensor transpose_347_perm_0 = const()[name = string("transpose_347_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_348_perm_0 = const()[name = string("transpose_348_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_348 = transpose(perm = transpose_348_perm_0, x = k_229_cast_fp16)[name = string("transpose_458")]; + tensor transpose_347 = transpose(perm = transpose_347_perm_0, x = q_183_cast_fp16)[name = string("transpose_459")]; + tensor qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_347, y = transpose_348)[name = string("qk_137_cast_fp16")]; + tensor var_4978_cast_fp16 = softmax(axis = var_4822, x = qk_137_cast_fp16)[name = string("op_4978_cast_fp16")]; + bool var_4980_transpose_x_0 = const()[name = string("op_4980_transpose_x_0"), val = bool(false)]; + bool var_4980_transpose_y_0 = const()[name = string("op_4980_transpose_y_0"), val = bool(false)]; + tensor v_229_cast_fp16 = transpose(perm = var_4974, x = var_4973_cast_fp16)[name = string("transpose_460")]; + tensor var_4980_cast_fp16 = matmul(transpose_x = var_4980_transpose_x_0, transpose_y = var_4980_transpose_y_0, x = var_4978_cast_fp16, y = v_229_cast_fp16)[name = string("op_4980_cast_fp16")]; + tensor var_4981 = const()[name = string("op_4981"), val = tensor([0, 2, 1, 3])]; + tensor concat_507x = const()[name = string("concat_507x"), val = tensor([1, -1, 1280])]; + tensor var_4982_cast_fp16 = transpose(perm = var_4981, x = var_4980_cast_fp16)[name = string("transpose_457")]; + tensor x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4982_cast_fp16)[name = string("x_409_cast_fp16")]; + tensor var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164756032)))]; + tensor var_4987_to_fp16 = const()[name = string("op_4987_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168032896)))]; + tensor linear_181_cast_fp16 = linear(bias = var_4987_to_fp16, weight = var_4986_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")]; + tensor var_4994_axes_0 = const()[name = string("op_4994_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168035520)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168038144)))]; + tensor var_4994_cast_fp16 = layer_norm(axes = var_4994_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4994_cast_fp16")]; + tensor var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168040768)))]; + tensor var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181148032)))]; + tensor linear_182_cast_fp16 = linear(bias = var_5004_to_fp16, weight = var_5003_to_fp16, x = var_4994_cast_fp16)[name = string("linear_182_cast_fp16")]; + string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")]; + tensor x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")]; + tensor var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181158336)))]; + tensor var_5010_to_fp16 = const()[name = string("op_5010_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194265600)))]; + tensor linear_183_cast_fp16 = linear(bias = var_5010_to_fp16, weight = var_5009_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")]; + tensor k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; + tensor k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_108)[name = string("k_cache_93_cast_fp16")]; + tensor v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; + tensor v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_109)[name = string("v_cache_93_cast_fp16")]; + tensor k_cache_95_begin_0 = const()[name = string("k_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_95_end_0 = const()[name = string("k_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; + tensor k_cache_95_end_mask_0 = const()[name = string("k_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_95_squeeze_mask_0 = const()[name = string("k_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_95_cast_fp16 = slice_by_index(begin = k_cache_95_begin_0, end = k_cache_95_end_0, end_mask = k_cache_95_end_mask_0, squeeze_mask = k_cache_95_squeeze_mask_0, x = read_state_2)[name = string("k_cache_95_cast_fp16")]; + tensor v_cache_95_begin_0 = const()[name = string("v_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_95_end_0 = const()[name = string("v_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; + tensor v_cache_95_end_mask_0 = const()[name = string("v_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_95_squeeze_mask_0 = const()[name = string("v_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_95_cast_fp16 = slice_by_index(begin = v_cache_95_begin_0, end = v_cache_95_end_0, end_mask = v_cache_95_end_mask_0, squeeze_mask = v_cache_95_squeeze_mask_0, x = read_state_3)[name = string("v_cache_95_cast_fp16")]; + int32 var_5033 = const()[name = string("op_5033"), val = int32(-1)]; + tensor var_5051_axes_0 = const()[name = string("op_5051_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194268224)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194270848)))]; + fp16 var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5051_cast_fp16 = layer_norm(axes = var_5051_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5051_cast_fp16")]; + tensor var_5062_to_fp16 = const()[name = string("op_5062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194273472)))]; + tensor var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197550336)))]; + tensor linear_184_cast_fp16 = linear(bias = var_5063_to_fp16, weight = var_5062_to_fp16, x = var_5051_cast_fp16)[name = string("linear_184_cast_fp16")]; + tensor var_5066_to_fp16 = const()[name = string("op_5066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197552960)))]; + tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5066_to_fp16, x = var_5051_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor var_5070_to_fp16 = const()[name = string("op_5070_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200829824)))]; + tensor var_5071_to_fp16 = const()[name = string("op_5071_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204106688)))]; + tensor linear_186_cast_fp16 = linear(bias = var_5071_to_fp16, weight = var_5070_to_fp16, x = var_5051_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_5073_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5073_shape_cast_fp16")]; + int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)]; + int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)]; + bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)]; + string var_5073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)]; + tensor var_5073_shape_cast_fp16_to_uint16 = cast(dtype = var_5073_shape_cast_fp16_to_uint16_dtype_0, x = var_5073_shape_cast_fp16)[name = string("cast_344")]; + uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5073_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")]; + string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_343")]; + int32 end_step_49 = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step_49")]; + tensor expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor([0])]; + tensor expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor([0])]; + tensor expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor([0])]; + tensor expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step_49)[name = string("expand_dims_371")]; + tensor concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor([23])]; + int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; + bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; + tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")]; + tensor concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor([0])]; + tensor concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor([0])]; + tensor concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor([0])]; + int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; + bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; + tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")]; + tensor k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_108)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = k_cache1)[name = string("coreml_update_state_110")]; + tensor v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_109)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = v_cache1)[name = string("coreml_update_state_111")]; + int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)]; + int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1280)]; + int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)]; + bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)]; + tensor concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step_49, concat_516_values2_0))[name = string("concat_516")]; + tensor var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_516, end_mask = var_5089_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5089_cast_fp16")]; + tensor var_5092_begin_0 = const()[name = string("op_5092_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5092_end_mask_0 = const()[name = string("op_5092_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5092_cast_fp16 = slice_by_index(begin = var_5092_begin_0, end = concat_516, end_mask = var_5092_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5092_cast_fp16")]; + tensor concat_518x = const()[name = string("concat_518x"), val = tensor([1, -1, 20, 64])]; + tensor var_5102_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5102_cast_fp16")]; + tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_187_cast_fp16 = mul(x = var_5102_cast_fp16, y = const_252_to_fp16)[name = string("q_187_cast_fp16")]; + tensor concat_519x = const()[name = string("concat_519x"), val = tensor([1, -1, 20, 64])]; + tensor var_5109_cast_fp16 = reshape(shape = concat_519x, x = var_5089_cast_fp16)[name = string("op_5109_cast_fp16")]; + tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_235_cast_fp16 = mul(x = var_5109_cast_fp16, y = const_253_to_fp16)[name = string("k_235_cast_fp16")]; + tensor concat_520x = const()[name = string("concat_520x"), val = tensor([1, -1, 20, 64])]; + tensor var_5116_cast_fp16 = reshape(shape = concat_520x, x = var_5092_cast_fp16)[name = string("op_5116_cast_fp16")]; + tensor var_5117 = const()[name = string("op_5117"), val = tensor([0, 2, 1, 3])]; + bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)]; + bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)]; + tensor transpose_349_perm_0 = const()[name = string("transpose_349_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_350_perm_0 = const()[name = string("transpose_350_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_350 = transpose(perm = transpose_350_perm_0, x = k_235_cast_fp16)[name = string("transpose_454")]; + tensor transpose_349 = transpose(perm = transpose_349_perm_0, x = q_187_cast_fp16)[name = string("transpose_455")]; + tensor qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_349, y = transpose_350)[name = string("qk_139_cast_fp16")]; + int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)]; + int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)]; + bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)]; + tensor concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")]; + tensor var_5120_begin_0 = const()[name = string("op_5120_begin_0"), val = tensor([0, 0])]; + tensor var_5120_end_mask_0 = const()[name = string("op_5120_end_mask_0"), val = tensor([false, true])]; + tensor var_5120_cast_fp16 = slice_by_index(begin = var_5120_begin_0, end = concat_521, end_mask = var_5120_end_mask_0, x = mask_to_fp16)[name = string("op_5120_cast_fp16")]; + int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)]; + int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)]; + bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)]; + tensor concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")]; + tensor var_5121_begin_0 = const()[name = string("op_5121_begin_0"), val = tensor([0, 0])]; + tensor var_5121_end_mask_0 = const()[name = string("op_5121_end_mask_0"), val = tensor([true, false])]; + tensor var_5121_cast_fp16 = slice_by_index(begin = var_5121_begin_0, end = concat_522, end_mask = var_5121_end_mask_0, x = var_5120_cast_fp16)[name = string("op_5121_cast_fp16")]; + tensor qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5121_cast_fp16)[name = string("qk_141_cast_fp16")]; + tensor var_5124_cast_fp16 = softmax(axis = var_5033, x = qk_141_cast_fp16)[name = string("op_5124_cast_fp16")]; + bool var_5126_transpose_x_0 = const()[name = string("op_5126_transpose_x_0"), val = bool(false)]; + bool var_5126_transpose_y_0 = const()[name = string("op_5126_transpose_y_0"), val = bool(false)]; + tensor v_235_cast_fp16 = transpose(perm = var_5117, x = var_5116_cast_fp16)[name = string("transpose_456")]; + tensor var_5126_cast_fp16 = matmul(transpose_x = var_5126_transpose_x_0, transpose_y = var_5126_transpose_y_0, x = var_5124_cast_fp16, y = v_235_cast_fp16)[name = string("op_5126_cast_fp16")]; + tensor var_5127 = const()[name = string("op_5127"), val = tensor([0, 2, 1, 3])]; + tensor concat_523x = const()[name = string("concat_523x"), val = tensor([1, -1, 1280])]; + tensor var_5128_cast_fp16 = transpose(perm = var_5127, x = var_5126_cast_fp16)[name = string("transpose_453")]; + tensor x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5128_cast_fp16)[name = string("x_421_cast_fp16")]; + tensor var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204109312)))]; + tensor var_5133_to_fp16 = const()[name = string("op_5133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207386176)))]; + tensor linear_187_cast_fp16 = linear(bias = var_5133_to_fp16, weight = var_5132_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")]; + tensor var_5140_axes_0 = const()[name = string("op_5140_axes_0"), val = tensor([-1])]; + tensor blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207388800)))]; + tensor blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207391424)))]; + tensor var_5140_cast_fp16 = layer_norm(axes = var_5140_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5140_cast_fp16")]; + tensor var_5149_to_fp16 = const()[name = string("op_5149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207394048)))]; + tensor var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210670912)))]; + tensor linear_188_cast_fp16 = linear(bias = var_5150_to_fp16, weight = var_5149_to_fp16, x = var_5140_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor concat_524 = const()[name = string("concat_524"), val = tensor([0, 0, 0])]; + tensor concat_525 = const()[name = string("concat_525"), val = tensor([0, 1500, 0])]; + tensor k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_526 = const()[name = string("concat_526"), val = tensor([0, 0, 0])]; + tensor concat_527 = const()[name = string("concat_527"), val = tensor([0, 1500, 0])]; + tensor v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, -1, 20, 64])]; + tensor var_5170_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5170_cast_fp16")]; + tensor const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_191_cast_fp16 = mul(x = var_5170_cast_fp16, y = const_254_to_fp16)[name = string("q_191_cast_fp16")]; + tensor var_5176 = const()[name = string("op_5176"), val = tensor([1, 1500, 20, -1])]; + tensor var_5177_cast_fp16 = reshape(shape = var_5176, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5177_cast_fp16")]; + tensor const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_239_cast_fp16 = mul(x = var_5177_cast_fp16, y = const_255_to_fp16)[name = string("k_239_cast_fp16")]; + tensor var_5183 = const()[name = string("op_5183"), val = tensor([1, 1500, 20, -1])]; + tensor var_5184_cast_fp16 = reshape(shape = var_5183, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5184_cast_fp16")]; + tensor var_5185 = const()[name = string("op_5185"), val = tensor([0, 2, 1, 3])]; + bool qk_143_transpose_x_0 = const()[name = string("qk_143_transpose_x_0"), val = bool(false)]; + bool qk_143_transpose_y_0 = const()[name = string("qk_143_transpose_y_0"), val = bool(false)]; + tensor transpose_351_perm_0 = const()[name = string("transpose_351_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_352_perm_0 = const()[name = string("transpose_352_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_352 = transpose(perm = transpose_352_perm_0, x = k_239_cast_fp16)[name = string("transpose_450")]; + tensor transpose_351 = transpose(perm = transpose_351_perm_0, x = q_191_cast_fp16)[name = string("transpose_451")]; + tensor qk_143_cast_fp16 = matmul(transpose_x = qk_143_transpose_x_0, transpose_y = qk_143_transpose_y_0, x = transpose_351, y = transpose_352)[name = string("qk_143_cast_fp16")]; + tensor var_5189_cast_fp16 = softmax(axis = var_5033, x = qk_143_cast_fp16)[name = string("op_5189_cast_fp16")]; + bool var_5191_transpose_x_0 = const()[name = string("op_5191_transpose_x_0"), val = bool(false)]; + bool var_5191_transpose_y_0 = const()[name = string("op_5191_transpose_y_0"), val = bool(false)]; + tensor v_239_cast_fp16 = transpose(perm = var_5185, x = var_5184_cast_fp16)[name = string("transpose_452")]; + tensor var_5191_cast_fp16 = matmul(transpose_x = var_5191_transpose_x_0, transpose_y = var_5191_transpose_y_0, x = var_5189_cast_fp16, y = v_239_cast_fp16)[name = string("op_5191_cast_fp16")]; + tensor var_5192 = const()[name = string("op_5192"), val = tensor([0, 2, 1, 3])]; + tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, -1, 1280])]; + tensor var_5193_cast_fp16 = transpose(perm = var_5192, x = var_5191_cast_fp16)[name = string("transpose_449")]; + tensor x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5193_cast_fp16)[name = string("x_427_cast_fp16")]; + tensor var_5197_to_fp16 = const()[name = string("op_5197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210673536)))]; + tensor var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213950400)))]; + tensor linear_189_cast_fp16 = linear(bias = var_5198_to_fp16, weight = var_5197_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")]; + tensor var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213953024)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213955648)))]; + tensor var_5205_cast_fp16 = layer_norm(axes = var_5205_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5205_cast_fp16")]; + tensor var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213958272)))]; + tensor var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227065536)))]; + tensor linear_190_cast_fp16 = linear(bias = var_5215_to_fp16, weight = var_5214_to_fp16, x = var_5205_cast_fp16)[name = string("linear_190_cast_fp16")]; + string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")]; + tensor x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")]; + tensor var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227075840)))]; + tensor var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240183104)))]; + tensor linear_191_cast_fp16 = linear(bias = var_5221_to_fp16, weight = var_5220_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")]; + tensor k_cache_97_begin_0 = const()[name = string("k_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor k_cache_97_end_0 = const()[name = string("k_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; + tensor k_cache_97_end_mask_0 = const()[name = string("k_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_97_squeeze_mask_0 = const()[name = string("k_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_97_cast_fp16 = slice_by_index(begin = k_cache_97_begin_0, end = k_cache_97_end_0, end_mask = k_cache_97_end_mask_0, squeeze_mask = k_cache_97_squeeze_mask_0, x = coreml_update_state_110)[name = string("k_cache_97_cast_fp16")]; + tensor v_cache_97_begin_0 = const()[name = string("v_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor v_cache_97_end_0 = const()[name = string("v_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; + tensor v_cache_97_end_mask_0 = const()[name = string("v_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_97_squeeze_mask_0 = const()[name = string("v_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_97_cast_fp16 = slice_by_index(begin = v_cache_97_begin_0, end = v_cache_97_end_0, end_mask = v_cache_97_end_mask_0, squeeze_mask = v_cache_97_squeeze_mask_0, x = coreml_update_state_111)[name = string("v_cache_97_cast_fp16")]; + tensor k_cache_99_begin_0 = const()[name = string("k_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor k_cache_99_end_0 = const()[name = string("k_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; + tensor k_cache_99_end_mask_0 = const()[name = string("k_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_99_squeeze_mask_0 = const()[name = string("k_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_99_cast_fp16 = slice_by_index(begin = k_cache_99_begin_0, end = k_cache_99_end_0, end_mask = k_cache_99_end_mask_0, squeeze_mask = k_cache_99_squeeze_mask_0, x = read_state_2)[name = string("k_cache_99_cast_fp16")]; + tensor v_cache_99_begin_0 = const()[name = string("v_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor v_cache_99_end_0 = const()[name = string("v_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; + tensor v_cache_99_end_mask_0 = const()[name = string("v_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_99_squeeze_mask_0 = const()[name = string("v_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_99_cast_fp16 = slice_by_index(begin = v_cache_99_begin_0, end = v_cache_99_end_0, end_mask = v_cache_99_end_mask_0, squeeze_mask = v_cache_99_squeeze_mask_0, x = read_state_3)[name = string("v_cache_99_cast_fp16")]; + int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)]; + tensor var_5262_axes_0 = const()[name = string("op_5262_axes_0"), val = tensor([-1])]; + tensor blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240185728)))]; + tensor blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240188352)))]; + fp16 var_5250_to_fp16 = const()[name = string("op_5250_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5262_cast_fp16 = layer_norm(axes = var_5262_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5262_cast_fp16")]; + tensor var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240190976)))]; + tensor var_5274_to_fp16 = const()[name = string("op_5274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243467840)))]; + tensor linear_192_cast_fp16 = linear(bias = var_5274_to_fp16, weight = var_5273_to_fp16, x = var_5262_cast_fp16)[name = string("linear_192_cast_fp16")]; + tensor var_5277_to_fp16 = const()[name = string("op_5277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243470464)))]; + tensor linear_193_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5277_to_fp16, x = var_5262_cast_fp16)[name = string("linear_193_cast_fp16")]; + tensor var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246747328)))]; + tensor var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250024192)))]; + tensor linear_194_cast_fp16 = linear(bias = var_5282_to_fp16, weight = var_5281_to_fp16, x = var_5262_cast_fp16)[name = string("linear_194_cast_fp16")]; + tensor var_5284_shape_cast_fp16 = shape(x = linear_192_cast_fp16)[name = string("op_5284_shape_cast_fp16")]; + int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)]; + int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)]; + bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)]; + string var_5284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_290_to_uint16 = const()[name = string("select_290_to_uint16"), val = uint16(1)]; + tensor var_5284_shape_cast_fp16_to_uint16 = cast(dtype = var_5284_shape_cast_fp16_to_uint16_dtype_0, x = var_5284_shape_cast_fp16)[name = string("cast_342")]; + uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = select_290_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_5284_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")]; + string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_341")]; + int32 end_step_51 = add(x = offset, y = gather_290_cast_uint16_to_int32)[name = string("end_step_51")]; + tensor expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor([0])]; + tensor expand_dims_386 = const()[name = string("expand_dims_386"), val = tensor([0])]; + tensor expand_dims_387_axes_0 = const()[name = string("expand_dims_387_axes_0"), val = tensor([0])]; + tensor expand_dims_387 = expand_dims(axes = expand_dims_387_axes_0, x = end_step_51)[name = string("expand_dims_387")]; + tensor concat_532_values0_0 = const()[name = string("concat_532_values0_0"), val = tensor([24])]; + int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)]; + bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)]; + tensor concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (concat_532_values0_0, expand_dims_384, expand_dims_1, expand_dims_386))[name = string("concat_532")]; + tensor concat_533_values0_0 = const()[name = string("concat_533_values0_0"), val = tensor([0])]; + tensor concat_533_values1_0 = const()[name = string("concat_533_values1_0"), val = tensor([0])]; + tensor concat_533_values3_0 = const()[name = string("concat_533_values3_0"), val = tensor([0])]; + int32 concat_533_axis_0 = const()[name = string("concat_533_axis_0"), val = int32(0)]; + bool concat_533_interleave_0 = const()[name = string("concat_533_interleave_0"), val = bool(false)]; + tensor concat_533 = concat(axis = concat_533_axis_0, interleave = concat_533_interleave_0, values = (concat_533_values0_0, concat_533_values1_0, expand_dims_387, concat_533_values3_0))[name = string("concat_533")]; + tensor k_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = k_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = k_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_25_stride_0, update = linear_193_cast_fp16, x = coreml_update_state_110)[name = string("k_cache1_internal_tensor_assign_25_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_25_cast_fp16, input = k_cache1)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = k_cache1)[name = string("coreml_update_state_112")]; + tensor v_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = v_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = v_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_25_stride_0, update = linear_194_cast_fp16, x = coreml_update_state_111)[name = string("v_cache1_internal_tensor_assign_25_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_25_cast_fp16, input = v_cache1)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = v_cache1)[name = string("coreml_update_state_113")]; + int32 concat_538_values0_0 = const()[name = string("concat_538_values0_0"), val = int32(1)]; + int32 concat_538_values2_0 = const()[name = string("concat_538_values2_0"), val = int32(1280)]; + int32 concat_538_axis_0 = const()[name = string("concat_538_axis_0"), val = int32(0)]; + bool concat_538_interleave_0 = const()[name = string("concat_538_interleave_0"), val = bool(false)]; + tensor concat_538 = concat(axis = concat_538_axis_0, interleave = concat_538_interleave_0, values = (concat_538_values0_0, end_step_51, concat_538_values2_0))[name = string("concat_538")]; + tensor var_5300_begin_0 = const()[name = string("op_5300_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5300_end_mask_0 = const()[name = string("op_5300_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5300_cast_fp16 = slice_by_index(begin = var_5300_begin_0, end = concat_538, end_mask = var_5300_end_mask_0, x = k_cache_97_cast_fp16)[name = string("op_5300_cast_fp16")]; + tensor var_5303_begin_0 = const()[name = string("op_5303_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5303_end_mask_0 = const()[name = string("op_5303_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5303_cast_fp16 = slice_by_index(begin = var_5303_begin_0, end = concat_538, end_mask = var_5303_end_mask_0, x = v_cache_97_cast_fp16)[name = string("op_5303_cast_fp16")]; + tensor concat_540x = const()[name = string("concat_540x"), val = tensor([1, -1, 20, 64])]; + tensor var_5313_cast_fp16 = reshape(shape = concat_540x, x = linear_192_cast_fp16)[name = string("op_5313_cast_fp16")]; + tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_195_cast_fp16 = mul(x = var_5313_cast_fp16, y = const_256_to_fp16)[name = string("q_195_cast_fp16")]; + tensor concat_541x = const()[name = string("concat_541x"), val = tensor([1, -1, 20, 64])]; + tensor var_5320_cast_fp16 = reshape(shape = concat_541x, x = var_5300_cast_fp16)[name = string("op_5320_cast_fp16")]; + tensor const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_245_cast_fp16 = mul(x = var_5320_cast_fp16, y = const_257_to_fp16)[name = string("k_245_cast_fp16")]; + tensor concat_542x = const()[name = string("concat_542x"), val = tensor([1, -1, 20, 64])]; + tensor var_5327_cast_fp16 = reshape(shape = concat_542x, x = var_5303_cast_fp16)[name = string("op_5327_cast_fp16")]; + tensor var_5328 = const()[name = string("op_5328"), val = tensor([0, 2, 1, 3])]; + bool qk_145_transpose_x_0 = const()[name = string("qk_145_transpose_x_0"), val = bool(false)]; + bool qk_145_transpose_y_0 = const()[name = string("qk_145_transpose_y_0"), val = bool(false)]; + tensor transpose_353_perm_0 = const()[name = string("transpose_353_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_354_perm_0 = const()[name = string("transpose_354_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_354 = transpose(perm = transpose_354_perm_0, x = k_245_cast_fp16)[name = string("transpose_446")]; + tensor transpose_353 = transpose(perm = transpose_353_perm_0, x = q_195_cast_fp16)[name = string("transpose_447")]; + tensor qk_145_cast_fp16 = matmul(transpose_x = qk_145_transpose_x_0, transpose_y = qk_145_transpose_y_0, x = transpose_353, y = transpose_354)[name = string("qk_145_cast_fp16")]; + int32 concat_543_values1_0 = const()[name = string("concat_543_values1_0"), val = int32(448)]; + int32 concat_543_axis_0 = const()[name = string("concat_543_axis_0"), val = int32(0)]; + bool concat_543_interleave_0 = const()[name = string("concat_543_interleave_0"), val = bool(false)]; + tensor concat_543 = concat(axis = concat_543_axis_0, interleave = concat_543_interleave_0, values = (gather_290_cast_uint16_to_int32, concat_543_values1_0))[name = string("concat_543")]; + tensor var_5331_begin_0 = const()[name = string("op_5331_begin_0"), val = tensor([0, 0])]; + tensor var_5331_end_mask_0 = const()[name = string("op_5331_end_mask_0"), val = tensor([false, true])]; + tensor var_5331_cast_fp16 = slice_by_index(begin = var_5331_begin_0, end = concat_543, end_mask = var_5331_end_mask_0, x = mask_to_fp16)[name = string("op_5331_cast_fp16")]; + int32 concat_544_values0_0 = const()[name = string("concat_544_values0_0"), val = int32(0)]; + int32 concat_544_axis_0 = const()[name = string("concat_544_axis_0"), val = int32(0)]; + bool concat_544_interleave_0 = const()[name = string("concat_544_interleave_0"), val = bool(false)]; + tensor concat_544 = concat(axis = concat_544_axis_0, interleave = concat_544_interleave_0, values = (concat_544_values0_0, gather_290_cast_uint16_to_int32))[name = string("concat_544")]; + tensor var_5332_begin_0 = const()[name = string("op_5332_begin_0"), val = tensor([0, 0])]; + tensor var_5332_end_mask_0 = const()[name = string("op_5332_end_mask_0"), val = tensor([true, false])]; + tensor var_5332_cast_fp16 = slice_by_index(begin = var_5332_begin_0, end = concat_544, end_mask = var_5332_end_mask_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")]; + tensor qk_147_cast_fp16 = add(x = qk_145_cast_fp16, y = var_5332_cast_fp16)[name = string("qk_147_cast_fp16")]; + tensor var_5335_cast_fp16 = softmax(axis = var_5244, x = qk_147_cast_fp16)[name = string("op_5335_cast_fp16")]; + bool var_5337_transpose_x_0 = const()[name = string("op_5337_transpose_x_0"), val = bool(false)]; + bool var_5337_transpose_y_0 = const()[name = string("op_5337_transpose_y_0"), val = bool(false)]; + tensor v_245_cast_fp16 = transpose(perm = var_5328, x = var_5327_cast_fp16)[name = string("transpose_448")]; + tensor var_5337_cast_fp16 = matmul(transpose_x = var_5337_transpose_x_0, transpose_y = var_5337_transpose_y_0, x = var_5335_cast_fp16, y = v_245_cast_fp16)[name = string("op_5337_cast_fp16")]; + tensor var_5338 = const()[name = string("op_5338"), val = tensor([0, 2, 1, 3])]; + tensor concat_545x = const()[name = string("concat_545x"), val = tensor([1, -1, 1280])]; + tensor var_5339_cast_fp16 = transpose(perm = var_5338, x = var_5337_cast_fp16)[name = string("transpose_445")]; + tensor x_439_cast_fp16 = reshape(shape = concat_545x, x = var_5339_cast_fp16)[name = string("x_439_cast_fp16")]; + tensor var_5343_to_fp16 = const()[name = string("op_5343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250026816)))]; + tensor var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253303680)))]; + tensor linear_195_cast_fp16 = linear(bias = var_5344_to_fp16, weight = var_5343_to_fp16, x = x_439_cast_fp16)[name = string("linear_195_cast_fp16")]; + tensor x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_195_cast_fp16)[name = string("x_441_cast_fp16")]; + tensor var_5351_axes_0 = const()[name = string("op_5351_axes_0"), val = tensor([-1])]; + tensor blocks_24_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253306304)))]; + tensor blocks_24_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253308928)))]; + tensor var_5351_cast_fp16 = layer_norm(axes = var_5351_axes_0, beta = blocks_24_cross_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_cross_attn_ln_weight_to_fp16, x = x_441_cast_fp16)[name = string("op_5351_cast_fp16")]; + tensor var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253311552)))]; + tensor var_5361_to_fp16 = const()[name = string("op_5361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256588416)))]; + tensor linear_196_cast_fp16 = linear(bias = var_5361_to_fp16, weight = var_5360_to_fp16, x = var_5351_cast_fp16)[name = string("linear_196_cast_fp16")]; + tensor concat_546 = const()[name = string("concat_546"), val = tensor([0, 0, 0])]; + tensor concat_547 = const()[name = string("concat_547"), val = tensor([0, 1500, 0])]; + tensor k_247_internal_tensor_assign_1_stride_0 = const()[name = string("k_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_546, begin_mask = k_247_internal_tensor_assign_1_begin_mask_0, end = concat_547, end_mask = k_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_247_internal_tensor_assign_1_squeeze_mask_0, stride = k_247_internal_tensor_assign_1_stride_0, update = k_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("k_247_internal_tensor_assign_1_cast_fp16")]; + tensor concat_548 = const()[name = string("concat_548"), val = tensor([0, 0, 0])]; + tensor concat_549 = const()[name = string("concat_549"), val = tensor([0, 1500, 0])]; + tensor v_247_internal_tensor_assign_1_stride_0 = const()[name = string("v_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_548, begin_mask = v_247_internal_tensor_assign_1_begin_mask_0, end = concat_549, end_mask = v_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_247_internal_tensor_assign_1_squeeze_mask_0, stride = v_247_internal_tensor_assign_1_stride_0, update = v_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("v_247_internal_tensor_assign_1_cast_fp16")]; + tensor concat_550x = const()[name = string("concat_550x"), val = tensor([1, -1, 20, 64])]; + tensor var_5381_cast_fp16 = reshape(shape = concat_550x, x = linear_196_cast_fp16)[name = string("op_5381_cast_fp16")]; + tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_199_cast_fp16 = mul(x = var_5381_cast_fp16, y = const_258_to_fp16)[name = string("q_199_cast_fp16")]; + tensor var_5387 = const()[name = string("op_5387"), val = tensor([1, 1500, 20, -1])]; + tensor var_5388_cast_fp16 = reshape(shape = var_5387, x = k_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5388_cast_fp16")]; + tensor const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_249_cast_fp16 = mul(x = var_5388_cast_fp16, y = const_259_to_fp16)[name = string("k_249_cast_fp16")]; + tensor var_5394 = const()[name = string("op_5394"), val = tensor([1, 1500, 20, -1])]; + tensor var_5395_cast_fp16 = reshape(shape = var_5394, x = v_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5395_cast_fp16")]; + tensor var_5396 = const()[name = string("op_5396"), val = tensor([0, 2, 1, 3])]; + bool qk_149_transpose_x_0 = const()[name = string("qk_149_transpose_x_0"), val = bool(false)]; + bool qk_149_transpose_y_0 = const()[name = string("qk_149_transpose_y_0"), val = bool(false)]; + tensor transpose_355_perm_0 = const()[name = string("transpose_355_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_356_perm_0 = const()[name = string("transpose_356_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_356 = transpose(perm = transpose_356_perm_0, x = k_249_cast_fp16)[name = string("transpose_442")]; + tensor transpose_355 = transpose(perm = transpose_355_perm_0, x = q_199_cast_fp16)[name = string("transpose_443")]; + tensor qk_149_cast_fp16 = matmul(transpose_x = qk_149_transpose_x_0, transpose_y = qk_149_transpose_y_0, x = transpose_355, y = transpose_356)[name = string("qk_149_cast_fp16")]; + tensor var_5400_cast_fp16 = softmax(axis = var_5244, x = qk_149_cast_fp16)[name = string("op_5400_cast_fp16")]; + bool var_5402_transpose_x_0 = const()[name = string("op_5402_transpose_x_0"), val = bool(false)]; + bool var_5402_transpose_y_0 = const()[name = string("op_5402_transpose_y_0"), val = bool(false)]; + tensor v_249_cast_fp16 = transpose(perm = var_5396, x = var_5395_cast_fp16)[name = string("transpose_444")]; + tensor var_5402_cast_fp16 = matmul(transpose_x = var_5402_transpose_x_0, transpose_y = var_5402_transpose_y_0, x = var_5400_cast_fp16, y = v_249_cast_fp16)[name = string("op_5402_cast_fp16")]; + tensor var_5403 = const()[name = string("op_5403"), val = tensor([0, 2, 1, 3])]; + tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 1280])]; + tensor var_5404_cast_fp16 = transpose(perm = var_5403, x = var_5402_cast_fp16)[name = string("transpose_441")]; + tensor x_445_cast_fp16 = reshape(shape = concat_551x, x = var_5404_cast_fp16)[name = string("x_445_cast_fp16")]; + tensor var_5408_to_fp16 = const()[name = string("op_5408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256591040)))]; + tensor var_5409_to_fp16 = const()[name = string("op_5409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259867904)))]; + tensor linear_197_cast_fp16 = linear(bias = var_5409_to_fp16, weight = var_5408_to_fp16, x = x_445_cast_fp16)[name = string("linear_197_cast_fp16")]; + tensor x_447_cast_fp16 = add(x = x_441_cast_fp16, y = linear_197_cast_fp16)[name = string("x_447_cast_fp16")]; + tensor var_5416_axes_0 = const()[name = string("op_5416_axes_0"), val = tensor([-1])]; + tensor blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259870528)))]; + tensor blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259873152)))]; + tensor var_5416_cast_fp16 = layer_norm(axes = var_5416_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_447_cast_fp16)[name = string("op_5416_cast_fp16")]; + tensor var_5425_to_fp16 = const()[name = string("op_5425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259875776)))]; + tensor var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272983040)))]; + tensor linear_198_cast_fp16 = linear(bias = var_5426_to_fp16, weight = var_5425_to_fp16, x = var_5416_cast_fp16)[name = string("linear_198_cast_fp16")]; + string x_451_mode_0 = const()[name = string("x_451_mode_0"), val = string("EXACT")]; + tensor x_451_cast_fp16 = gelu(mode = x_451_mode_0, x = linear_198_cast_fp16)[name = string("x_451_cast_fp16")]; + tensor var_5431_to_fp16 = const()[name = string("op_5431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272993344)))]; + tensor var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286100608)))]; + tensor linear_199_cast_fp16 = linear(bias = var_5432_to_fp16, weight = var_5431_to_fp16, x = x_451_cast_fp16)[name = string("linear_199_cast_fp16")]; + tensor x_453_cast_fp16 = add(x = x_447_cast_fp16, y = linear_199_cast_fp16)[name = string("x_453_cast_fp16")]; + tensor k_cache_101_begin_0 = const()[name = string("k_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor k_cache_101_end_0 = const()[name = string("k_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; + tensor k_cache_101_end_mask_0 = const()[name = string("k_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_101_squeeze_mask_0 = const()[name = string("k_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_101_cast_fp16 = slice_by_index(begin = k_cache_101_begin_0, end = k_cache_101_end_0, end_mask = k_cache_101_end_mask_0, squeeze_mask = k_cache_101_squeeze_mask_0, x = coreml_update_state_112)[name = string("k_cache_101_cast_fp16")]; + tensor v_cache_101_begin_0 = const()[name = string("v_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor v_cache_101_end_0 = const()[name = string("v_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; + tensor v_cache_101_end_mask_0 = const()[name = string("v_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_101_squeeze_mask_0 = const()[name = string("v_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_101_cast_fp16 = slice_by_index(begin = v_cache_101_begin_0, end = v_cache_101_end_0, end_mask = v_cache_101_end_mask_0, squeeze_mask = v_cache_101_squeeze_mask_0, x = coreml_update_state_113)[name = string("v_cache_101_cast_fp16")]; + tensor k_cache_103_begin_0 = const()[name = string("k_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor k_cache_103_end_0 = const()[name = string("k_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; + tensor k_cache_103_end_mask_0 = const()[name = string("k_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_103_squeeze_mask_0 = const()[name = string("k_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_103_cast_fp16 = slice_by_index(begin = k_cache_103_begin_0, end = k_cache_103_end_0, end_mask = k_cache_103_end_mask_0, squeeze_mask = k_cache_103_squeeze_mask_0, x = read_state_2)[name = string("k_cache_103_cast_fp16")]; + tensor v_cache_103_begin_0 = const()[name = string("v_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor v_cache_103_end_0 = const()[name = string("v_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; + tensor v_cache_103_end_mask_0 = const()[name = string("v_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_103_squeeze_mask_0 = const()[name = string("v_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_103_cast_fp16 = slice_by_index(begin = v_cache_103_begin_0, end = v_cache_103_end_0, end_mask = v_cache_103_end_mask_0, squeeze_mask = v_cache_103_squeeze_mask_0, x = read_state_3)[name = string("v_cache_103_cast_fp16")]; + int32 var_5455 = const()[name = string("op_5455"), val = int32(-1)]; + tensor var_5473_axes_0 = const()[name = string("op_5473_axes_0"), val = tensor([-1])]; + tensor blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286103232)))]; + tensor blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286105856)))]; + fp16 var_5461_to_fp16 = const()[name = string("op_5461_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5473_cast_fp16 = layer_norm(axes = var_5473_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_453_cast_fp16)[name = string("op_5473_cast_fp16")]; + tensor var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286108480)))]; + tensor var_5485_to_fp16 = const()[name = string("op_5485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289385344)))]; + tensor linear_200_cast_fp16 = linear(bias = var_5485_to_fp16, weight = var_5484_to_fp16, x = var_5473_cast_fp16)[name = string("linear_200_cast_fp16")]; + tensor var_5488_to_fp16 = const()[name = string("op_5488_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289387968)))]; + tensor linear_201_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5488_to_fp16, x = var_5473_cast_fp16)[name = string("linear_201_cast_fp16")]; + tensor var_5492_to_fp16 = const()[name = string("op_5492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1292664832)))]; + tensor var_5493_to_fp16 = const()[name = string("op_5493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295941696)))]; + tensor linear_202_cast_fp16 = linear(bias = var_5493_to_fp16, weight = var_5492_to_fp16, x = var_5473_cast_fp16)[name = string("linear_202_cast_fp16")]; + tensor var_5495_shape_cast_fp16 = shape(x = linear_200_cast_fp16)[name = string("op_5495_shape_cast_fp16")]; + int32 gather_302_axis_0 = const()[name = string("gather_302_axis_0"), val = int32(0)]; + int32 gather_302_batch_dims_0 = const()[name = string("gather_302_batch_dims_0"), val = int32(0)]; + bool gather_302_validate_indices_0 = const()[name = string("gather_302_validate_indices_0"), val = bool(false)]; + string var_5495_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5495_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_302_to_uint16 = const()[name = string("select_302_to_uint16"), val = uint16(1)]; + tensor var_5495_shape_cast_fp16_to_uint16 = cast(dtype = var_5495_shape_cast_fp16_to_uint16_dtype_0, x = var_5495_shape_cast_fp16)[name = string("cast_340")]; + uint16 gather_302_cast_uint16 = gather(axis = gather_302_axis_0, batch_dims = gather_302_batch_dims_0, indices = select_302_to_uint16, validate_indices = gather_302_validate_indices_0, x = var_5495_shape_cast_fp16_to_uint16)[name = string("gather_302_cast_uint16")]; + string gather_302_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_302_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_302_cast_uint16_to_int32 = cast(dtype = gather_302_cast_uint16_to_int32_dtype_0, x = gather_302_cast_uint16)[name = string("cast_339")]; + int32 end_step_53 = add(x = offset, y = gather_302_cast_uint16_to_int32)[name = string("end_step_53")]; + tensor expand_dims_400 = const()[name = string("expand_dims_400"), val = tensor([0])]; + tensor expand_dims_402 = const()[name = string("expand_dims_402"), val = tensor([0])]; + tensor expand_dims_403_axes_0 = const()[name = string("expand_dims_403_axes_0"), val = tensor([0])]; + tensor expand_dims_403 = expand_dims(axes = expand_dims_403_axes_0, x = end_step_53)[name = string("expand_dims_403")]; + tensor concat_554_values0_0 = const()[name = string("concat_554_values0_0"), val = tensor([25])]; + int32 concat_554_axis_0 = const()[name = string("concat_554_axis_0"), val = int32(0)]; + bool concat_554_interleave_0 = const()[name = string("concat_554_interleave_0"), val = bool(false)]; + tensor concat_554 = concat(axis = concat_554_axis_0, interleave = concat_554_interleave_0, values = (concat_554_values0_0, expand_dims_400, expand_dims_1, expand_dims_402))[name = string("concat_554")]; + tensor concat_555_values0_0 = const()[name = string("concat_555_values0_0"), val = tensor([0])]; + tensor concat_555_values1_0 = const()[name = string("concat_555_values1_0"), val = tensor([0])]; + tensor concat_555_values3_0 = const()[name = string("concat_555_values3_0"), val = tensor([0])]; + int32 concat_555_axis_0 = const()[name = string("concat_555_axis_0"), val = int32(0)]; + bool concat_555_interleave_0 = const()[name = string("concat_555_interleave_0"), val = bool(false)]; + tensor concat_555 = concat(axis = concat_555_axis_0, interleave = concat_555_interleave_0, values = (concat_555_values0_0, concat_555_values1_0, expand_dims_403, concat_555_values3_0))[name = string("concat_555")]; + tensor k_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = k_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = k_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_26_stride_0, update = linear_201_cast_fp16, x = coreml_update_state_112)[name = string("k_cache1_internal_tensor_assign_26_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_26_cast_fp16, input = k_cache1)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = k_cache1)[name = string("coreml_update_state_114")]; + tensor v_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = v_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = v_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_26_stride_0, update = linear_202_cast_fp16, x = coreml_update_state_113)[name = string("v_cache1_internal_tensor_assign_26_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_26_cast_fp16, input = v_cache1)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = v_cache1)[name = string("coreml_update_state_115")]; + int32 concat_560_values0_0 = const()[name = string("concat_560_values0_0"), val = int32(1)]; + int32 concat_560_values2_0 = const()[name = string("concat_560_values2_0"), val = int32(1280)]; + int32 concat_560_axis_0 = const()[name = string("concat_560_axis_0"), val = int32(0)]; + bool concat_560_interleave_0 = const()[name = string("concat_560_interleave_0"), val = bool(false)]; + tensor concat_560 = concat(axis = concat_560_axis_0, interleave = concat_560_interleave_0, values = (concat_560_values0_0, end_step_53, concat_560_values2_0))[name = string("concat_560")]; + tensor var_5511_begin_0 = const()[name = string("op_5511_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5511_end_mask_0 = const()[name = string("op_5511_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5511_cast_fp16 = slice_by_index(begin = var_5511_begin_0, end = concat_560, end_mask = var_5511_end_mask_0, x = k_cache_101_cast_fp16)[name = string("op_5511_cast_fp16")]; + tensor var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = concat_560, end_mask = var_5514_end_mask_0, x = v_cache_101_cast_fp16)[name = string("op_5514_cast_fp16")]; + tensor concat_562x = const()[name = string("concat_562x"), val = tensor([1, -1, 20, 64])]; + tensor var_5524_cast_fp16 = reshape(shape = concat_562x, x = linear_200_cast_fp16)[name = string("op_5524_cast_fp16")]; + tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_203_cast_fp16 = mul(x = var_5524_cast_fp16, y = const_260_to_fp16)[name = string("q_203_cast_fp16")]; + tensor concat_563x = const()[name = string("concat_563x"), val = tensor([1, -1, 20, 64])]; + tensor var_5531_cast_fp16 = reshape(shape = concat_563x, x = var_5511_cast_fp16)[name = string("op_5531_cast_fp16")]; + tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_255_cast_fp16 = mul(x = var_5531_cast_fp16, y = const_261_to_fp16)[name = string("k_255_cast_fp16")]; + tensor concat_564x = const()[name = string("concat_564x"), val = tensor([1, -1, 20, 64])]; + tensor var_5538_cast_fp16 = reshape(shape = concat_564x, x = var_5514_cast_fp16)[name = string("op_5538_cast_fp16")]; + tensor var_5539 = const()[name = string("op_5539"), val = tensor([0, 2, 1, 3])]; + bool qk_151_transpose_x_0 = const()[name = string("qk_151_transpose_x_0"), val = bool(false)]; + bool qk_151_transpose_y_0 = const()[name = string("qk_151_transpose_y_0"), val = bool(false)]; + tensor transpose_357_perm_0 = const()[name = string("transpose_357_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_358_perm_0 = const()[name = string("transpose_358_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_358 = transpose(perm = transpose_358_perm_0, x = k_255_cast_fp16)[name = string("transpose_438")]; + tensor transpose_357 = transpose(perm = transpose_357_perm_0, x = q_203_cast_fp16)[name = string("transpose_439")]; + tensor qk_151_cast_fp16 = matmul(transpose_x = qk_151_transpose_x_0, transpose_y = qk_151_transpose_y_0, x = transpose_357, y = transpose_358)[name = string("qk_151_cast_fp16")]; + int32 concat_565_values1_0 = const()[name = string("concat_565_values1_0"), val = int32(448)]; + int32 concat_565_axis_0 = const()[name = string("concat_565_axis_0"), val = int32(0)]; + bool concat_565_interleave_0 = const()[name = string("concat_565_interleave_0"), val = bool(false)]; + tensor concat_565 = concat(axis = concat_565_axis_0, interleave = concat_565_interleave_0, values = (gather_302_cast_uint16_to_int32, concat_565_values1_0))[name = string("concat_565")]; + tensor var_5542_begin_0 = const()[name = string("op_5542_begin_0"), val = tensor([0, 0])]; + tensor var_5542_end_mask_0 = const()[name = string("op_5542_end_mask_0"), val = tensor([false, true])]; + tensor var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = concat_565, end_mask = var_5542_end_mask_0, x = mask_to_fp16)[name = string("op_5542_cast_fp16")]; + int32 concat_566_values0_0 = const()[name = string("concat_566_values0_0"), val = int32(0)]; + int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; + bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; + tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (concat_566_values0_0, gather_302_cast_uint16_to_int32))[name = string("concat_566")]; + tensor var_5543_begin_0 = const()[name = string("op_5543_begin_0"), val = tensor([0, 0])]; + tensor var_5543_end_mask_0 = const()[name = string("op_5543_end_mask_0"), val = tensor([true, false])]; + tensor var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = concat_566, end_mask = var_5543_end_mask_0, x = var_5542_cast_fp16)[name = string("op_5543_cast_fp16")]; + tensor qk_153_cast_fp16 = add(x = qk_151_cast_fp16, y = var_5543_cast_fp16)[name = string("qk_153_cast_fp16")]; + tensor var_5546_cast_fp16 = softmax(axis = var_5455, x = qk_153_cast_fp16)[name = string("op_5546_cast_fp16")]; + bool var_5548_transpose_x_0 = const()[name = string("op_5548_transpose_x_0"), val = bool(false)]; + bool var_5548_transpose_y_0 = const()[name = string("op_5548_transpose_y_0"), val = bool(false)]; + tensor v_255_cast_fp16 = transpose(perm = var_5539, x = var_5538_cast_fp16)[name = string("transpose_440")]; + tensor var_5548_cast_fp16 = matmul(transpose_x = var_5548_transpose_x_0, transpose_y = var_5548_transpose_y_0, x = var_5546_cast_fp16, y = v_255_cast_fp16)[name = string("op_5548_cast_fp16")]; + tensor var_5549 = const()[name = string("op_5549"), val = tensor([0, 2, 1, 3])]; + tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, -1, 1280])]; + tensor var_5550_cast_fp16 = transpose(perm = var_5549, x = var_5548_cast_fp16)[name = string("transpose_437")]; + tensor x_457_cast_fp16 = reshape(shape = concat_567x, x = var_5550_cast_fp16)[name = string("x_457_cast_fp16")]; + tensor var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295944320)))]; + tensor var_5555_to_fp16 = const()[name = string("op_5555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299221184)))]; + tensor linear_203_cast_fp16 = linear(bias = var_5555_to_fp16, weight = var_5554_to_fp16, x = x_457_cast_fp16)[name = string("linear_203_cast_fp16")]; + tensor x_459_cast_fp16 = add(x = x_453_cast_fp16, y = linear_203_cast_fp16)[name = string("x_459_cast_fp16")]; + tensor var_5562_axes_0 = const()[name = string("op_5562_axes_0"), val = tensor([-1])]; + tensor blocks_25_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299223808)))]; + tensor blocks_25_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299226432)))]; + tensor var_5562_cast_fp16 = layer_norm(axes = var_5562_axes_0, beta = blocks_25_cross_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_cross_attn_ln_weight_to_fp16, x = x_459_cast_fp16)[name = string("op_5562_cast_fp16")]; + tensor var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299229056)))]; + tensor var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302505920)))]; + tensor linear_204_cast_fp16 = linear(bias = var_5572_to_fp16, weight = var_5571_to_fp16, x = var_5562_cast_fp16)[name = string("linear_204_cast_fp16")]; + tensor concat_568 = const()[name = string("concat_568"), val = tensor([0, 0, 0])]; + tensor concat_569 = const()[name = string("concat_569"), val = tensor([0, 1500, 0])]; + tensor k_257_internal_tensor_assign_1_stride_0 = const()[name = string("k_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_568, begin_mask = k_257_internal_tensor_assign_1_begin_mask_0, end = concat_569, end_mask = k_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_257_internal_tensor_assign_1_squeeze_mask_0, stride = k_257_internal_tensor_assign_1_stride_0, update = k_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("k_257_internal_tensor_assign_1_cast_fp16")]; + tensor concat_570 = const()[name = string("concat_570"), val = tensor([0, 0, 0])]; + tensor concat_571 = const()[name = string("concat_571"), val = tensor([0, 1500, 0])]; + tensor v_257_internal_tensor_assign_1_stride_0 = const()[name = string("v_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_570, begin_mask = v_257_internal_tensor_assign_1_begin_mask_0, end = concat_571, end_mask = v_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_257_internal_tensor_assign_1_squeeze_mask_0, stride = v_257_internal_tensor_assign_1_stride_0, update = v_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("v_257_internal_tensor_assign_1_cast_fp16")]; + tensor concat_572x = const()[name = string("concat_572x"), val = tensor([1, -1, 20, 64])]; + tensor var_5592_cast_fp16 = reshape(shape = concat_572x, x = linear_204_cast_fp16)[name = string("op_5592_cast_fp16")]; + tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_207_cast_fp16 = mul(x = var_5592_cast_fp16, y = const_262_to_fp16)[name = string("q_207_cast_fp16")]; + tensor var_5598 = const()[name = string("op_5598"), val = tensor([1, 1500, 20, -1])]; + tensor var_5599_cast_fp16 = reshape(shape = var_5598, x = k_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5599_cast_fp16")]; + tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_259_cast_fp16 = mul(x = var_5599_cast_fp16, y = const_263_to_fp16)[name = string("k_259_cast_fp16")]; + tensor var_5605 = const()[name = string("op_5605"), val = tensor([1, 1500, 20, -1])]; + tensor var_5606_cast_fp16 = reshape(shape = var_5605, x = v_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5606_cast_fp16")]; + tensor var_5607 = const()[name = string("op_5607"), val = tensor([0, 2, 1, 3])]; + bool qk_155_transpose_x_0 = const()[name = string("qk_155_transpose_x_0"), val = bool(false)]; + bool qk_155_transpose_y_0 = const()[name = string("qk_155_transpose_y_0"), val = bool(false)]; + tensor transpose_359_perm_0 = const()[name = string("transpose_359_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_360_perm_0 = const()[name = string("transpose_360_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_360 = transpose(perm = transpose_360_perm_0, x = k_259_cast_fp16)[name = string("transpose_434")]; + tensor transpose_359 = transpose(perm = transpose_359_perm_0, x = q_207_cast_fp16)[name = string("transpose_435")]; + tensor qk_155_cast_fp16 = matmul(transpose_x = qk_155_transpose_x_0, transpose_y = qk_155_transpose_y_0, x = transpose_359, y = transpose_360)[name = string("qk_155_cast_fp16")]; + tensor var_5611_cast_fp16 = softmax(axis = var_5455, x = qk_155_cast_fp16)[name = string("op_5611_cast_fp16")]; + bool var_5613_transpose_x_0 = const()[name = string("op_5613_transpose_x_0"), val = bool(false)]; + bool var_5613_transpose_y_0 = const()[name = string("op_5613_transpose_y_0"), val = bool(false)]; + tensor v_259_cast_fp16 = transpose(perm = var_5607, x = var_5606_cast_fp16)[name = string("transpose_436")]; + tensor var_5613_cast_fp16 = matmul(transpose_x = var_5613_transpose_x_0, transpose_y = var_5613_transpose_y_0, x = var_5611_cast_fp16, y = v_259_cast_fp16)[name = string("op_5613_cast_fp16")]; + tensor var_5614 = const()[name = string("op_5614"), val = tensor([0, 2, 1, 3])]; + tensor concat_573x = const()[name = string("concat_573x"), val = tensor([1, -1, 1280])]; + tensor var_5615_cast_fp16 = transpose(perm = var_5614, x = var_5613_cast_fp16)[name = string("transpose_433")]; + tensor x_463_cast_fp16 = reshape(shape = concat_573x, x = var_5615_cast_fp16)[name = string("x_463_cast_fp16")]; + tensor var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302508544)))]; + tensor var_5620_to_fp16 = const()[name = string("op_5620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305785408)))]; + tensor linear_205_cast_fp16 = linear(bias = var_5620_to_fp16, weight = var_5619_to_fp16, x = x_463_cast_fp16)[name = string("linear_205_cast_fp16")]; + tensor x_465_cast_fp16 = add(x = x_459_cast_fp16, y = linear_205_cast_fp16)[name = string("x_465_cast_fp16")]; + tensor var_5627_axes_0 = const()[name = string("op_5627_axes_0"), val = tensor([-1])]; + tensor blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305788032)))]; + tensor blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305790656)))]; + tensor var_5627_cast_fp16 = layer_norm(axes = var_5627_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_465_cast_fp16)[name = string("op_5627_cast_fp16")]; + tensor var_5636_to_fp16 = const()[name = string("op_5636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305793280)))]; + tensor var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318900544)))]; + tensor linear_206_cast_fp16 = linear(bias = var_5637_to_fp16, weight = var_5636_to_fp16, x = var_5627_cast_fp16)[name = string("linear_206_cast_fp16")]; + string x_469_mode_0 = const()[name = string("x_469_mode_0"), val = string("EXACT")]; + tensor x_469_cast_fp16 = gelu(mode = x_469_mode_0, x = linear_206_cast_fp16)[name = string("x_469_cast_fp16")]; + tensor var_5642_to_fp16 = const()[name = string("op_5642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318910848)))]; + tensor var_5643_to_fp16 = const()[name = string("op_5643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332018112)))]; + tensor linear_207_cast_fp16 = linear(bias = var_5643_to_fp16, weight = var_5642_to_fp16, x = x_469_cast_fp16)[name = string("linear_207_cast_fp16")]; + tensor x_471_cast_fp16 = add(x = x_465_cast_fp16, y = linear_207_cast_fp16)[name = string("x_471_cast_fp16")]; + tensor k_cache_105_begin_0 = const()[name = string("k_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor k_cache_105_end_0 = const()[name = string("k_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; + tensor k_cache_105_end_mask_0 = const()[name = string("k_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_105_squeeze_mask_0 = const()[name = string("k_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_105_cast_fp16 = slice_by_index(begin = k_cache_105_begin_0, end = k_cache_105_end_0, end_mask = k_cache_105_end_mask_0, squeeze_mask = k_cache_105_squeeze_mask_0, x = coreml_update_state_114)[name = string("k_cache_105_cast_fp16")]; + tensor v_cache_105_begin_0 = const()[name = string("v_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor v_cache_105_end_0 = const()[name = string("v_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; + tensor v_cache_105_end_mask_0 = const()[name = string("v_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_105_squeeze_mask_0 = const()[name = string("v_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_105_cast_fp16 = slice_by_index(begin = v_cache_105_begin_0, end = v_cache_105_end_0, end_mask = v_cache_105_end_mask_0, squeeze_mask = v_cache_105_squeeze_mask_0, x = coreml_update_state_115)[name = string("v_cache_105_cast_fp16")]; + tensor k_cache_107_begin_0 = const()[name = string("k_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor k_cache_107_end_0 = const()[name = string("k_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; + tensor k_cache_107_end_mask_0 = const()[name = string("k_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_107_squeeze_mask_0 = const()[name = string("k_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_107_cast_fp16 = slice_by_index(begin = k_cache_107_begin_0, end = k_cache_107_end_0, end_mask = k_cache_107_end_mask_0, squeeze_mask = k_cache_107_squeeze_mask_0, x = read_state_2)[name = string("k_cache_107_cast_fp16")]; + tensor v_cache_107_begin_0 = const()[name = string("v_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor v_cache_107_end_0 = const()[name = string("v_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; + tensor v_cache_107_end_mask_0 = const()[name = string("v_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_107_squeeze_mask_0 = const()[name = string("v_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_107_cast_fp16 = slice_by_index(begin = v_cache_107_begin_0, end = v_cache_107_end_0, end_mask = v_cache_107_end_mask_0, squeeze_mask = v_cache_107_squeeze_mask_0, x = read_state_3)[name = string("v_cache_107_cast_fp16")]; + int32 var_5666 = const()[name = string("op_5666"), val = int32(-1)]; + tensor var_5684_axes_0 = const()[name = string("op_5684_axes_0"), val = tensor([-1])]; + tensor blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332020736)))]; + tensor blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332023360)))]; + fp16 var_5672_to_fp16 = const()[name = string("op_5672_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5684_cast_fp16 = layer_norm(axes = var_5684_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_471_cast_fp16)[name = string("op_5684_cast_fp16")]; + tensor var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332025984)))]; + tensor var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335302848)))]; + tensor linear_208_cast_fp16 = linear(bias = var_5696_to_fp16, weight = var_5695_to_fp16, x = var_5684_cast_fp16)[name = string("linear_208_cast_fp16")]; + tensor var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335305472)))]; + tensor linear_209_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5699_to_fp16, x = var_5684_cast_fp16)[name = string("linear_209_cast_fp16")]; + tensor var_5703_to_fp16 = const()[name = string("op_5703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1338582336)))]; + tensor var_5704_to_fp16 = const()[name = string("op_5704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341859200)))]; + tensor linear_210_cast_fp16 = linear(bias = var_5704_to_fp16, weight = var_5703_to_fp16, x = var_5684_cast_fp16)[name = string("linear_210_cast_fp16")]; + tensor var_5706_shape_cast_fp16 = shape(x = linear_208_cast_fp16)[name = string("op_5706_shape_cast_fp16")]; + int32 gather_314_axis_0 = const()[name = string("gather_314_axis_0"), val = int32(0)]; + int32 gather_314_batch_dims_0 = const()[name = string("gather_314_batch_dims_0"), val = int32(0)]; + bool gather_314_validate_indices_0 = const()[name = string("gather_314_validate_indices_0"), val = bool(false)]; + string var_5706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_314_to_uint16 = const()[name = string("select_314_to_uint16"), val = uint16(1)]; + tensor var_5706_shape_cast_fp16_to_uint16 = cast(dtype = var_5706_shape_cast_fp16_to_uint16_dtype_0, x = var_5706_shape_cast_fp16)[name = string("cast_338")]; + uint16 gather_314_cast_uint16 = gather(axis = gather_314_axis_0, batch_dims = gather_314_batch_dims_0, indices = select_314_to_uint16, validate_indices = gather_314_validate_indices_0, x = var_5706_shape_cast_fp16_to_uint16)[name = string("gather_314_cast_uint16")]; + string gather_314_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_314_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_314_cast_uint16_to_int32 = cast(dtype = gather_314_cast_uint16_to_int32_dtype_0, x = gather_314_cast_uint16)[name = string("cast_337")]; + int32 end_step_55 = add(x = offset, y = gather_314_cast_uint16_to_int32)[name = string("end_step_55")]; + tensor expand_dims_416 = const()[name = string("expand_dims_416"), val = tensor([0])]; + tensor expand_dims_418 = const()[name = string("expand_dims_418"), val = tensor([0])]; + tensor expand_dims_419_axes_0 = const()[name = string("expand_dims_419_axes_0"), val = tensor([0])]; + tensor expand_dims_419 = expand_dims(axes = expand_dims_419_axes_0, x = end_step_55)[name = string("expand_dims_419")]; + tensor concat_576_values0_0 = const()[name = string("concat_576_values0_0"), val = tensor([26])]; + int32 concat_576_axis_0 = const()[name = string("concat_576_axis_0"), val = int32(0)]; + bool concat_576_interleave_0 = const()[name = string("concat_576_interleave_0"), val = bool(false)]; + tensor concat_576 = concat(axis = concat_576_axis_0, interleave = concat_576_interleave_0, values = (concat_576_values0_0, expand_dims_416, expand_dims_1, expand_dims_418))[name = string("concat_576")]; + tensor concat_577_values0_0 = const()[name = string("concat_577_values0_0"), val = tensor([0])]; + tensor concat_577_values1_0 = const()[name = string("concat_577_values1_0"), val = tensor([0])]; + tensor concat_577_values3_0 = const()[name = string("concat_577_values3_0"), val = tensor([0])]; + int32 concat_577_axis_0 = const()[name = string("concat_577_axis_0"), val = int32(0)]; + bool concat_577_interleave_0 = const()[name = string("concat_577_interleave_0"), val = bool(false)]; + tensor concat_577 = concat(axis = concat_577_axis_0, interleave = concat_577_interleave_0, values = (concat_577_values0_0, concat_577_values1_0, expand_dims_419, concat_577_values3_0))[name = string("concat_577")]; + tensor k_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = k_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = k_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_27_stride_0, update = linear_209_cast_fp16, x = coreml_update_state_114)[name = string("k_cache1_internal_tensor_assign_27_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_27_cast_fp16, input = k_cache1)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = k_cache1)[name = string("coreml_update_state_116")]; + tensor v_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = v_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = v_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_27_stride_0, update = linear_210_cast_fp16, x = coreml_update_state_115)[name = string("v_cache1_internal_tensor_assign_27_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_27_cast_fp16, input = v_cache1)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = v_cache1)[name = string("coreml_update_state_117")]; + int32 concat_582_values0_0 = const()[name = string("concat_582_values0_0"), val = int32(1)]; + int32 concat_582_values2_0 = const()[name = string("concat_582_values2_0"), val = int32(1280)]; + int32 concat_582_axis_0 = const()[name = string("concat_582_axis_0"), val = int32(0)]; + bool concat_582_interleave_0 = const()[name = string("concat_582_interleave_0"), val = bool(false)]; + tensor concat_582 = concat(axis = concat_582_axis_0, interleave = concat_582_interleave_0, values = (concat_582_values0_0, end_step_55, concat_582_values2_0))[name = string("concat_582")]; + tensor var_5722_begin_0 = const()[name = string("op_5722_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5722_end_mask_0 = const()[name = string("op_5722_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5722_cast_fp16 = slice_by_index(begin = var_5722_begin_0, end = concat_582, end_mask = var_5722_end_mask_0, x = k_cache_105_cast_fp16)[name = string("op_5722_cast_fp16")]; + tensor var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = concat_582, end_mask = var_5725_end_mask_0, x = v_cache_105_cast_fp16)[name = string("op_5725_cast_fp16")]; + tensor concat_584x = const()[name = string("concat_584x"), val = tensor([1, -1, 20, 64])]; + tensor var_5735_cast_fp16 = reshape(shape = concat_584x, x = linear_208_cast_fp16)[name = string("op_5735_cast_fp16")]; + tensor const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_211_cast_fp16 = mul(x = var_5735_cast_fp16, y = const_264_to_fp16)[name = string("q_211_cast_fp16")]; + tensor concat_585x = const()[name = string("concat_585x"), val = tensor([1, -1, 20, 64])]; + tensor var_5742_cast_fp16 = reshape(shape = concat_585x, x = var_5722_cast_fp16)[name = string("op_5742_cast_fp16")]; + tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_265_cast_fp16 = mul(x = var_5742_cast_fp16, y = const_265_to_fp16)[name = string("k_265_cast_fp16")]; + tensor concat_586x = const()[name = string("concat_586x"), val = tensor([1, -1, 20, 64])]; + tensor var_5749_cast_fp16 = reshape(shape = concat_586x, x = var_5725_cast_fp16)[name = string("op_5749_cast_fp16")]; + tensor var_5750 = const()[name = string("op_5750"), val = tensor([0, 2, 1, 3])]; + bool qk_157_transpose_x_0 = const()[name = string("qk_157_transpose_x_0"), val = bool(false)]; + bool qk_157_transpose_y_0 = const()[name = string("qk_157_transpose_y_0"), val = bool(false)]; + tensor transpose_361_perm_0 = const()[name = string("transpose_361_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_362_perm_0 = const()[name = string("transpose_362_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_362 = transpose(perm = transpose_362_perm_0, x = k_265_cast_fp16)[name = string("transpose_430")]; + tensor transpose_361 = transpose(perm = transpose_361_perm_0, x = q_211_cast_fp16)[name = string("transpose_431")]; + tensor qk_157_cast_fp16 = matmul(transpose_x = qk_157_transpose_x_0, transpose_y = qk_157_transpose_y_0, x = transpose_361, y = transpose_362)[name = string("qk_157_cast_fp16")]; + int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(448)]; + int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)]; + bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)]; + tensor concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (gather_314_cast_uint16_to_int32, concat_587_values1_0))[name = string("concat_587")]; + tensor var_5753_begin_0 = const()[name = string("op_5753_begin_0"), val = tensor([0, 0])]; + tensor var_5753_end_mask_0 = const()[name = string("op_5753_end_mask_0"), val = tensor([false, true])]; + tensor var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = concat_587, end_mask = var_5753_end_mask_0, x = mask_to_fp16)[name = string("op_5753_cast_fp16")]; + int32 concat_588_values0_0 = const()[name = string("concat_588_values0_0"), val = int32(0)]; + int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)]; + bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)]; + tensor concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (concat_588_values0_0, gather_314_cast_uint16_to_int32))[name = string("concat_588")]; + tensor var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor([0, 0])]; + tensor var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor([true, false])]; + tensor var_5754_cast_fp16 = slice_by_index(begin = var_5754_begin_0, end = concat_588, end_mask = var_5754_end_mask_0, x = var_5753_cast_fp16)[name = string("op_5754_cast_fp16")]; + tensor qk_159_cast_fp16 = add(x = qk_157_cast_fp16, y = var_5754_cast_fp16)[name = string("qk_159_cast_fp16")]; + tensor var_5757_cast_fp16 = softmax(axis = var_5666, x = qk_159_cast_fp16)[name = string("op_5757_cast_fp16")]; + bool var_5759_transpose_x_0 = const()[name = string("op_5759_transpose_x_0"), val = bool(false)]; + bool var_5759_transpose_y_0 = const()[name = string("op_5759_transpose_y_0"), val = bool(false)]; + tensor v_265_cast_fp16 = transpose(perm = var_5750, x = var_5749_cast_fp16)[name = string("transpose_432")]; + tensor var_5759_cast_fp16 = matmul(transpose_x = var_5759_transpose_x_0, transpose_y = var_5759_transpose_y_0, x = var_5757_cast_fp16, y = v_265_cast_fp16)[name = string("op_5759_cast_fp16")]; + tensor var_5760 = const()[name = string("op_5760"), val = tensor([0, 2, 1, 3])]; + tensor concat_589x = const()[name = string("concat_589x"), val = tensor([1, -1, 1280])]; + tensor var_5761_cast_fp16 = transpose(perm = var_5760, x = var_5759_cast_fp16)[name = string("transpose_429")]; + tensor x_475_cast_fp16 = reshape(shape = concat_589x, x = var_5761_cast_fp16)[name = string("x_475_cast_fp16")]; + tensor var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341861824)))]; + tensor var_5766_to_fp16 = const()[name = string("op_5766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345138688)))]; + tensor linear_211_cast_fp16 = linear(bias = var_5766_to_fp16, weight = var_5765_to_fp16, x = x_475_cast_fp16)[name = string("linear_211_cast_fp16")]; + tensor x_477_cast_fp16 = add(x = x_471_cast_fp16, y = linear_211_cast_fp16)[name = string("x_477_cast_fp16")]; + tensor var_5773_axes_0 = const()[name = string("op_5773_axes_0"), val = tensor([-1])]; + tensor blocks_26_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345141312)))]; + tensor blocks_26_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345143936)))]; + tensor var_5773_cast_fp16 = layer_norm(axes = var_5773_axes_0, beta = blocks_26_cross_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_cross_attn_ln_weight_to_fp16, x = x_477_cast_fp16)[name = string("op_5773_cast_fp16")]; + tensor var_5782_to_fp16 = const()[name = string("op_5782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345146560)))]; + tensor var_5783_to_fp16 = const()[name = string("op_5783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348423424)))]; + tensor linear_212_cast_fp16 = linear(bias = var_5783_to_fp16, weight = var_5782_to_fp16, x = var_5773_cast_fp16)[name = string("linear_212_cast_fp16")]; + tensor concat_590 = const()[name = string("concat_590"), val = tensor([0, 0, 0])]; + tensor concat_591 = const()[name = string("concat_591"), val = tensor([0, 1500, 0])]; + tensor k_267_internal_tensor_assign_1_stride_0 = const()[name = string("k_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_590, begin_mask = k_267_internal_tensor_assign_1_begin_mask_0, end = concat_591, end_mask = k_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_267_internal_tensor_assign_1_squeeze_mask_0, stride = k_267_internal_tensor_assign_1_stride_0, update = k_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("k_267_internal_tensor_assign_1_cast_fp16")]; + tensor concat_592 = const()[name = string("concat_592"), val = tensor([0, 0, 0])]; + tensor concat_593 = const()[name = string("concat_593"), val = tensor([0, 1500, 0])]; + tensor v_267_internal_tensor_assign_1_stride_0 = const()[name = string("v_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_592, begin_mask = v_267_internal_tensor_assign_1_begin_mask_0, end = concat_593, end_mask = v_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_267_internal_tensor_assign_1_squeeze_mask_0, stride = v_267_internal_tensor_assign_1_stride_0, update = v_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("v_267_internal_tensor_assign_1_cast_fp16")]; + tensor concat_594x = const()[name = string("concat_594x"), val = tensor([1, -1, 20, 64])]; + tensor var_5803_cast_fp16 = reshape(shape = concat_594x, x = linear_212_cast_fp16)[name = string("op_5803_cast_fp16")]; + tensor const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_215_cast_fp16 = mul(x = var_5803_cast_fp16, y = const_266_to_fp16)[name = string("q_215_cast_fp16")]; + tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 1500, 20, -1])]; + tensor var_5810_cast_fp16 = reshape(shape = var_5809, x = k_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5810_cast_fp16")]; + tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_269_cast_fp16 = mul(x = var_5810_cast_fp16, y = const_267_to_fp16)[name = string("k_269_cast_fp16")]; + tensor var_5816 = const()[name = string("op_5816"), val = tensor([1, 1500, 20, -1])]; + tensor var_5817_cast_fp16 = reshape(shape = var_5816, x = v_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5817_cast_fp16")]; + tensor var_5818 = const()[name = string("op_5818"), val = tensor([0, 2, 1, 3])]; + bool qk_161_transpose_x_0 = const()[name = string("qk_161_transpose_x_0"), val = bool(false)]; + bool qk_161_transpose_y_0 = const()[name = string("qk_161_transpose_y_0"), val = bool(false)]; + tensor transpose_363_perm_0 = const()[name = string("transpose_363_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_364_perm_0 = const()[name = string("transpose_364_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_364 = transpose(perm = transpose_364_perm_0, x = k_269_cast_fp16)[name = string("transpose_426")]; + tensor transpose_363 = transpose(perm = transpose_363_perm_0, x = q_215_cast_fp16)[name = string("transpose_427")]; + tensor qk_161_cast_fp16 = matmul(transpose_x = qk_161_transpose_x_0, transpose_y = qk_161_transpose_y_0, x = transpose_363, y = transpose_364)[name = string("qk_161_cast_fp16")]; + tensor var_5822_cast_fp16 = softmax(axis = var_5666, x = qk_161_cast_fp16)[name = string("op_5822_cast_fp16")]; + bool var_5824_transpose_x_0 = const()[name = string("op_5824_transpose_x_0"), val = bool(false)]; + bool var_5824_transpose_y_0 = const()[name = string("op_5824_transpose_y_0"), val = bool(false)]; + tensor v_269_cast_fp16 = transpose(perm = var_5818, x = var_5817_cast_fp16)[name = string("transpose_428")]; + tensor var_5824_cast_fp16 = matmul(transpose_x = var_5824_transpose_x_0, transpose_y = var_5824_transpose_y_0, x = var_5822_cast_fp16, y = v_269_cast_fp16)[name = string("op_5824_cast_fp16")]; + tensor var_5825 = const()[name = string("op_5825"), val = tensor([0, 2, 1, 3])]; + tensor concat_595x = const()[name = string("concat_595x"), val = tensor([1, -1, 1280])]; + tensor var_5826_cast_fp16 = transpose(perm = var_5825, x = var_5824_cast_fp16)[name = string("transpose_425")]; + tensor x_481_cast_fp16 = reshape(shape = concat_595x, x = var_5826_cast_fp16)[name = string("x_481_cast_fp16")]; + tensor var_5830_to_fp16 = const()[name = string("op_5830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348426048)))]; + tensor var_5831_to_fp16 = const()[name = string("op_5831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351702912)))]; + tensor linear_213_cast_fp16 = linear(bias = var_5831_to_fp16, weight = var_5830_to_fp16, x = x_481_cast_fp16)[name = string("linear_213_cast_fp16")]; + tensor x_483_cast_fp16 = add(x = x_477_cast_fp16, y = linear_213_cast_fp16)[name = string("x_483_cast_fp16")]; + tensor var_5838_axes_0 = const()[name = string("op_5838_axes_0"), val = tensor([-1])]; + tensor blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351705536)))]; + tensor blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351708160)))]; + tensor var_5838_cast_fp16 = layer_norm(axes = var_5838_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_483_cast_fp16)[name = string("op_5838_cast_fp16")]; + tensor var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351710784)))]; + tensor var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364818048)))]; + tensor linear_214_cast_fp16 = linear(bias = var_5848_to_fp16, weight = var_5847_to_fp16, x = var_5838_cast_fp16)[name = string("linear_214_cast_fp16")]; + string x_487_mode_0 = const()[name = string("x_487_mode_0"), val = string("EXACT")]; + tensor x_487_cast_fp16 = gelu(mode = x_487_mode_0, x = linear_214_cast_fp16)[name = string("x_487_cast_fp16")]; + tensor var_5853_to_fp16 = const()[name = string("op_5853_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364828352)))]; + tensor var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377935616)))]; + tensor linear_215_cast_fp16 = linear(bias = var_5854_to_fp16, weight = var_5853_to_fp16, x = x_487_cast_fp16)[name = string("linear_215_cast_fp16")]; + tensor x_489_cast_fp16 = add(x = x_483_cast_fp16, y = linear_215_cast_fp16)[name = string("x_489_cast_fp16")]; + tensor k_cache_109_begin_0 = const()[name = string("k_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor k_cache_109_end_0 = const()[name = string("k_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; + tensor k_cache_109_end_mask_0 = const()[name = string("k_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_109_squeeze_mask_0 = const()[name = string("k_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_109_cast_fp16 = slice_by_index(begin = k_cache_109_begin_0, end = k_cache_109_end_0, end_mask = k_cache_109_end_mask_0, squeeze_mask = k_cache_109_squeeze_mask_0, x = coreml_update_state_116)[name = string("k_cache_109_cast_fp16")]; + tensor v_cache_109_begin_0 = const()[name = string("v_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor v_cache_109_end_0 = const()[name = string("v_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; + tensor v_cache_109_end_mask_0 = const()[name = string("v_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_109_squeeze_mask_0 = const()[name = string("v_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_109_cast_fp16 = slice_by_index(begin = v_cache_109_begin_0, end = v_cache_109_end_0, end_mask = v_cache_109_end_mask_0, squeeze_mask = v_cache_109_squeeze_mask_0, x = coreml_update_state_117)[name = string("v_cache_109_cast_fp16")]; + tensor k_cache_111_begin_0 = const()[name = string("k_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor k_cache_111_end_0 = const()[name = string("k_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; + tensor k_cache_111_end_mask_0 = const()[name = string("k_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_111_squeeze_mask_0 = const()[name = string("k_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_111_cast_fp16 = slice_by_index(begin = k_cache_111_begin_0, end = k_cache_111_end_0, end_mask = k_cache_111_end_mask_0, squeeze_mask = k_cache_111_squeeze_mask_0, x = read_state_2)[name = string("k_cache_111_cast_fp16")]; + tensor v_cache_111_begin_0 = const()[name = string("v_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor v_cache_111_end_0 = const()[name = string("v_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; + tensor v_cache_111_end_mask_0 = const()[name = string("v_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_111_squeeze_mask_0 = const()[name = string("v_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_111_cast_fp16 = slice_by_index(begin = v_cache_111_begin_0, end = v_cache_111_end_0, end_mask = v_cache_111_end_mask_0, squeeze_mask = v_cache_111_squeeze_mask_0, x = read_state_3)[name = string("v_cache_111_cast_fp16")]; + int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)]; + tensor var_5895_axes_0 = const()[name = string("op_5895_axes_0"), val = tensor([-1])]; + tensor blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377938240)))]; + tensor blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377940864)))]; + fp16 var_5883_to_fp16 = const()[name = string("op_5883_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5895_cast_fp16 = layer_norm(axes = var_5895_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_489_cast_fp16)[name = string("op_5895_cast_fp16")]; + tensor var_5906_to_fp16 = const()[name = string("op_5906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377943488)))]; + tensor var_5907_to_fp16 = const()[name = string("op_5907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381220352)))]; + tensor linear_216_cast_fp16 = linear(bias = var_5907_to_fp16, weight = var_5906_to_fp16, x = var_5895_cast_fp16)[name = string("linear_216_cast_fp16")]; + tensor var_5910_to_fp16 = const()[name = string("op_5910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381222976)))]; + tensor linear_217_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5910_to_fp16, x = var_5895_cast_fp16)[name = string("linear_217_cast_fp16")]; + tensor var_5914_to_fp16 = const()[name = string("op_5914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1384499840)))]; + tensor var_5915_to_fp16 = const()[name = string("op_5915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387776704)))]; + tensor linear_218_cast_fp16 = linear(bias = var_5915_to_fp16, weight = var_5914_to_fp16, x = var_5895_cast_fp16)[name = string("linear_218_cast_fp16")]; + tensor var_5917_shape_cast_fp16 = shape(x = linear_216_cast_fp16)[name = string("op_5917_shape_cast_fp16")]; + int32 gather_326_axis_0 = const()[name = string("gather_326_axis_0"), val = int32(0)]; + int32 gather_326_batch_dims_0 = const()[name = string("gather_326_batch_dims_0"), val = int32(0)]; + bool gather_326_validate_indices_0 = const()[name = string("gather_326_validate_indices_0"), val = bool(false)]; + string var_5917_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5917_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_326_to_uint16 = const()[name = string("select_326_to_uint16"), val = uint16(1)]; + tensor var_5917_shape_cast_fp16_to_uint16 = cast(dtype = var_5917_shape_cast_fp16_to_uint16_dtype_0, x = var_5917_shape_cast_fp16)[name = string("cast_336")]; + uint16 gather_326_cast_uint16 = gather(axis = gather_326_axis_0, batch_dims = gather_326_batch_dims_0, indices = select_326_to_uint16, validate_indices = gather_326_validate_indices_0, x = var_5917_shape_cast_fp16_to_uint16)[name = string("gather_326_cast_uint16")]; + string gather_326_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_326_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_326_cast_uint16_to_int32 = cast(dtype = gather_326_cast_uint16_to_int32_dtype_0, x = gather_326_cast_uint16)[name = string("cast_335")]; + int32 end_step_57 = add(x = offset, y = gather_326_cast_uint16_to_int32)[name = string("end_step_57")]; + tensor expand_dims_432 = const()[name = string("expand_dims_432"), val = tensor([0])]; + tensor expand_dims_434 = const()[name = string("expand_dims_434"), val = tensor([0])]; + tensor expand_dims_435_axes_0 = const()[name = string("expand_dims_435_axes_0"), val = tensor([0])]; + tensor expand_dims_435 = expand_dims(axes = expand_dims_435_axes_0, x = end_step_57)[name = string("expand_dims_435")]; + tensor concat_598_values0_0 = const()[name = string("concat_598_values0_0"), val = tensor([27])]; + int32 concat_598_axis_0 = const()[name = string("concat_598_axis_0"), val = int32(0)]; + bool concat_598_interleave_0 = const()[name = string("concat_598_interleave_0"), val = bool(false)]; + tensor concat_598 = concat(axis = concat_598_axis_0, interleave = concat_598_interleave_0, values = (concat_598_values0_0, expand_dims_432, expand_dims_1, expand_dims_434))[name = string("concat_598")]; + tensor concat_599_values0_0 = const()[name = string("concat_599_values0_0"), val = tensor([0])]; + tensor concat_599_values1_0 = const()[name = string("concat_599_values1_0"), val = tensor([0])]; + tensor concat_599_values3_0 = const()[name = string("concat_599_values3_0"), val = tensor([0])]; + int32 concat_599_axis_0 = const()[name = string("concat_599_axis_0"), val = int32(0)]; + bool concat_599_interleave_0 = const()[name = string("concat_599_interleave_0"), val = bool(false)]; + tensor concat_599 = concat(axis = concat_599_axis_0, interleave = concat_599_interleave_0, values = (concat_599_values0_0, concat_599_values1_0, expand_dims_435, concat_599_values3_0))[name = string("concat_599")]; + tensor k_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = k_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = k_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_28_stride_0, update = linear_217_cast_fp16, x = coreml_update_state_116)[name = string("k_cache1_internal_tensor_assign_28_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_28_cast_fp16, input = k_cache1)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = k_cache1)[name = string("coreml_update_state_118")]; + tensor v_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = v_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = v_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_28_stride_0, update = linear_218_cast_fp16, x = coreml_update_state_117)[name = string("v_cache1_internal_tensor_assign_28_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_28_cast_fp16, input = v_cache1)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = v_cache1)[name = string("coreml_update_state_119")]; + int32 concat_604_values0_0 = const()[name = string("concat_604_values0_0"), val = int32(1)]; + int32 concat_604_values2_0 = const()[name = string("concat_604_values2_0"), val = int32(1280)]; + int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)]; + bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)]; + tensor concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (concat_604_values0_0, end_step_57, concat_604_values2_0))[name = string("concat_604")]; + tensor var_5933_begin_0 = const()[name = string("op_5933_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5933_end_mask_0 = const()[name = string("op_5933_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = concat_604, end_mask = var_5933_end_mask_0, x = k_cache_109_cast_fp16)[name = string("op_5933_cast_fp16")]; + tensor var_5936_begin_0 = const()[name = string("op_5936_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5936_end_mask_0 = const()[name = string("op_5936_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5936_cast_fp16 = slice_by_index(begin = var_5936_begin_0, end = concat_604, end_mask = var_5936_end_mask_0, x = v_cache_109_cast_fp16)[name = string("op_5936_cast_fp16")]; + tensor concat_606x = const()[name = string("concat_606x"), val = tensor([1, -1, 20, 64])]; + tensor var_5946_cast_fp16 = reshape(shape = concat_606x, x = linear_216_cast_fp16)[name = string("op_5946_cast_fp16")]; + tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_219_cast_fp16 = mul(x = var_5946_cast_fp16, y = const_268_to_fp16)[name = string("q_219_cast_fp16")]; + tensor concat_607x = const()[name = string("concat_607x"), val = tensor([1, -1, 20, 64])]; + tensor var_5953_cast_fp16 = reshape(shape = concat_607x, x = var_5933_cast_fp16)[name = string("op_5953_cast_fp16")]; + tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_275_cast_fp16 = mul(x = var_5953_cast_fp16, y = const_269_to_fp16)[name = string("k_275_cast_fp16")]; + tensor concat_608x = const()[name = string("concat_608x"), val = tensor([1, -1, 20, 64])]; + tensor var_5960_cast_fp16 = reshape(shape = concat_608x, x = var_5936_cast_fp16)[name = string("op_5960_cast_fp16")]; + tensor var_5961 = const()[name = string("op_5961"), val = tensor([0, 2, 1, 3])]; + bool qk_163_transpose_x_0 = const()[name = string("qk_163_transpose_x_0"), val = bool(false)]; + bool qk_163_transpose_y_0 = const()[name = string("qk_163_transpose_y_0"), val = bool(false)]; + tensor transpose_365_perm_0 = const()[name = string("transpose_365_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_366_perm_0 = const()[name = string("transpose_366_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_366 = transpose(perm = transpose_366_perm_0, x = k_275_cast_fp16)[name = string("transpose_422")]; + tensor transpose_365 = transpose(perm = transpose_365_perm_0, x = q_219_cast_fp16)[name = string("transpose_423")]; + tensor qk_163_cast_fp16 = matmul(transpose_x = qk_163_transpose_x_0, transpose_y = qk_163_transpose_y_0, x = transpose_365, y = transpose_366)[name = string("qk_163_cast_fp16")]; + int32 concat_609_values1_0 = const()[name = string("concat_609_values1_0"), val = int32(448)]; + int32 concat_609_axis_0 = const()[name = string("concat_609_axis_0"), val = int32(0)]; + bool concat_609_interleave_0 = const()[name = string("concat_609_interleave_0"), val = bool(false)]; + tensor concat_609 = concat(axis = concat_609_axis_0, interleave = concat_609_interleave_0, values = (gather_326_cast_uint16_to_int32, concat_609_values1_0))[name = string("concat_609")]; + tensor var_5964_begin_0 = const()[name = string("op_5964_begin_0"), val = tensor([0, 0])]; + tensor var_5964_end_mask_0 = const()[name = string("op_5964_end_mask_0"), val = tensor([false, true])]; + tensor var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = concat_609, end_mask = var_5964_end_mask_0, x = mask_to_fp16)[name = string("op_5964_cast_fp16")]; + int32 concat_610_values0_0 = const()[name = string("concat_610_values0_0"), val = int32(0)]; + int32 concat_610_axis_0 = const()[name = string("concat_610_axis_0"), val = int32(0)]; + bool concat_610_interleave_0 = const()[name = string("concat_610_interleave_0"), val = bool(false)]; + tensor concat_610 = concat(axis = concat_610_axis_0, interleave = concat_610_interleave_0, values = (concat_610_values0_0, gather_326_cast_uint16_to_int32))[name = string("concat_610")]; + tensor var_5965_begin_0 = const()[name = string("op_5965_begin_0"), val = tensor([0, 0])]; + tensor var_5965_end_mask_0 = const()[name = string("op_5965_end_mask_0"), val = tensor([true, false])]; + tensor var_5965_cast_fp16 = slice_by_index(begin = var_5965_begin_0, end = concat_610, end_mask = var_5965_end_mask_0, x = var_5964_cast_fp16)[name = string("op_5965_cast_fp16")]; + tensor qk_165_cast_fp16 = add(x = qk_163_cast_fp16, y = var_5965_cast_fp16)[name = string("qk_165_cast_fp16")]; + tensor var_5968_cast_fp16 = softmax(axis = var_5877, x = qk_165_cast_fp16)[name = string("op_5968_cast_fp16")]; + bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)]; + bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(false)]; + tensor v_275_cast_fp16 = transpose(perm = var_5961, x = var_5960_cast_fp16)[name = string("transpose_424")]; + tensor var_5970_cast_fp16 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = var_5968_cast_fp16, y = v_275_cast_fp16)[name = string("op_5970_cast_fp16")]; + tensor var_5971 = const()[name = string("op_5971"), val = tensor([0, 2, 1, 3])]; + tensor concat_611x = const()[name = string("concat_611x"), val = tensor([1, -1, 1280])]; + tensor var_5972_cast_fp16 = transpose(perm = var_5971, x = var_5970_cast_fp16)[name = string("transpose_421")]; + tensor x_493_cast_fp16 = reshape(shape = concat_611x, x = var_5972_cast_fp16)[name = string("x_493_cast_fp16")]; + tensor var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387779328)))]; + tensor var_5977_to_fp16 = const()[name = string("op_5977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391056192)))]; + tensor linear_219_cast_fp16 = linear(bias = var_5977_to_fp16, weight = var_5976_to_fp16, x = x_493_cast_fp16)[name = string("linear_219_cast_fp16")]; + tensor x_495_cast_fp16 = add(x = x_489_cast_fp16, y = linear_219_cast_fp16)[name = string("x_495_cast_fp16")]; + tensor var_5984_axes_0 = const()[name = string("op_5984_axes_0"), val = tensor([-1])]; + tensor blocks_27_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391058816)))]; + tensor blocks_27_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391061440)))]; + tensor var_5984_cast_fp16 = layer_norm(axes = var_5984_axes_0, beta = blocks_27_cross_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_cross_attn_ln_weight_to_fp16, x = x_495_cast_fp16)[name = string("op_5984_cast_fp16")]; + tensor var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391064064)))]; + tensor var_5994_to_fp16 = const()[name = string("op_5994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394340928)))]; + tensor linear_220_cast_fp16 = linear(bias = var_5994_to_fp16, weight = var_5993_to_fp16, x = var_5984_cast_fp16)[name = string("linear_220_cast_fp16")]; + tensor concat_612 = const()[name = string("concat_612"), val = tensor([0, 0, 0])]; + tensor concat_613 = const()[name = string("concat_613"), val = tensor([0, 1500, 0])]; + tensor k_277_internal_tensor_assign_1_stride_0 = const()[name = string("k_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_612, begin_mask = k_277_internal_tensor_assign_1_begin_mask_0, end = concat_613, end_mask = k_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_277_internal_tensor_assign_1_squeeze_mask_0, stride = k_277_internal_tensor_assign_1_stride_0, update = k_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("k_277_internal_tensor_assign_1_cast_fp16")]; + tensor concat_614 = const()[name = string("concat_614"), val = tensor([0, 0, 0])]; + tensor concat_615 = const()[name = string("concat_615"), val = tensor([0, 1500, 0])]; + tensor v_277_internal_tensor_assign_1_stride_0 = const()[name = string("v_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_614, begin_mask = v_277_internal_tensor_assign_1_begin_mask_0, end = concat_615, end_mask = v_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_277_internal_tensor_assign_1_squeeze_mask_0, stride = v_277_internal_tensor_assign_1_stride_0, update = v_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("v_277_internal_tensor_assign_1_cast_fp16")]; + tensor concat_616x = const()[name = string("concat_616x"), val = tensor([1, -1, 20, 64])]; + tensor var_6014_cast_fp16 = reshape(shape = concat_616x, x = linear_220_cast_fp16)[name = string("op_6014_cast_fp16")]; + tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_223_cast_fp16 = mul(x = var_6014_cast_fp16, y = const_270_to_fp16)[name = string("q_223_cast_fp16")]; + tensor var_6020 = const()[name = string("op_6020"), val = tensor([1, 1500, 20, -1])]; + tensor var_6021_cast_fp16 = reshape(shape = var_6020, x = k_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6021_cast_fp16")]; + tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_279_cast_fp16 = mul(x = var_6021_cast_fp16, y = const_271_to_fp16)[name = string("k_279_cast_fp16")]; + tensor var_6027 = const()[name = string("op_6027"), val = tensor([1, 1500, 20, -1])]; + tensor var_6028_cast_fp16 = reshape(shape = var_6027, x = v_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6028_cast_fp16")]; + tensor var_6029 = const()[name = string("op_6029"), val = tensor([0, 2, 1, 3])]; + bool qk_167_transpose_x_0 = const()[name = string("qk_167_transpose_x_0"), val = bool(false)]; + bool qk_167_transpose_y_0 = const()[name = string("qk_167_transpose_y_0"), val = bool(false)]; + tensor transpose_367_perm_0 = const()[name = string("transpose_367_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_368_perm_0 = const()[name = string("transpose_368_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_368 = transpose(perm = transpose_368_perm_0, x = k_279_cast_fp16)[name = string("transpose_418")]; + tensor transpose_367 = transpose(perm = transpose_367_perm_0, x = q_223_cast_fp16)[name = string("transpose_419")]; + tensor qk_167_cast_fp16 = matmul(transpose_x = qk_167_transpose_x_0, transpose_y = qk_167_transpose_y_0, x = transpose_367, y = transpose_368)[name = string("qk_167_cast_fp16")]; + tensor var_6033_cast_fp16 = softmax(axis = var_5877, x = qk_167_cast_fp16)[name = string("op_6033_cast_fp16")]; + bool var_6035_transpose_x_0 = const()[name = string("op_6035_transpose_x_0"), val = bool(false)]; + bool var_6035_transpose_y_0 = const()[name = string("op_6035_transpose_y_0"), val = bool(false)]; + tensor v_279_cast_fp16 = transpose(perm = var_6029, x = var_6028_cast_fp16)[name = string("transpose_420")]; + tensor var_6035_cast_fp16 = matmul(transpose_x = var_6035_transpose_x_0, transpose_y = var_6035_transpose_y_0, x = var_6033_cast_fp16, y = v_279_cast_fp16)[name = string("op_6035_cast_fp16")]; + tensor var_6036 = const()[name = string("op_6036"), val = tensor([0, 2, 1, 3])]; + tensor concat_617x = const()[name = string("concat_617x"), val = tensor([1, -1, 1280])]; + tensor var_6037_cast_fp16 = transpose(perm = var_6036, x = var_6035_cast_fp16)[name = string("transpose_417")]; + tensor x_499_cast_fp16 = reshape(shape = concat_617x, x = var_6037_cast_fp16)[name = string("x_499_cast_fp16")]; + tensor var_6041_to_fp16 = const()[name = string("op_6041_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394343552)))]; + tensor var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397620416)))]; + tensor linear_221_cast_fp16 = linear(bias = var_6042_to_fp16, weight = var_6041_to_fp16, x = x_499_cast_fp16)[name = string("linear_221_cast_fp16")]; + tensor x_501_cast_fp16 = add(x = x_495_cast_fp16, y = linear_221_cast_fp16)[name = string("x_501_cast_fp16")]; + tensor var_6049_axes_0 = const()[name = string("op_6049_axes_0"), val = tensor([-1])]; + tensor blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397623040)))]; + tensor blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397625664)))]; + tensor var_6049_cast_fp16 = layer_norm(axes = var_6049_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_501_cast_fp16)[name = string("op_6049_cast_fp16")]; + tensor var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397628288)))]; + tensor var_6059_to_fp16 = const()[name = string("op_6059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410735552)))]; + tensor linear_222_cast_fp16 = linear(bias = var_6059_to_fp16, weight = var_6058_to_fp16, x = var_6049_cast_fp16)[name = string("linear_222_cast_fp16")]; + string x_505_mode_0 = const()[name = string("x_505_mode_0"), val = string("EXACT")]; + tensor x_505_cast_fp16 = gelu(mode = x_505_mode_0, x = linear_222_cast_fp16)[name = string("x_505_cast_fp16")]; + tensor var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410745856)))]; + tensor var_6065_to_fp16 = const()[name = string("op_6065_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423853120)))]; + tensor linear_223_cast_fp16 = linear(bias = var_6065_to_fp16, weight = var_6064_to_fp16, x = x_505_cast_fp16)[name = string("linear_223_cast_fp16")]; + tensor x_507_cast_fp16 = add(x = x_501_cast_fp16, y = linear_223_cast_fp16)[name = string("x_507_cast_fp16")]; + tensor k_cache_113_begin_0 = const()[name = string("k_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor k_cache_113_end_0 = const()[name = string("k_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; + tensor k_cache_113_end_mask_0 = const()[name = string("k_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_113_squeeze_mask_0 = const()[name = string("k_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_113_cast_fp16 = slice_by_index(begin = k_cache_113_begin_0, end = k_cache_113_end_0, end_mask = k_cache_113_end_mask_0, squeeze_mask = k_cache_113_squeeze_mask_0, x = coreml_update_state_118)[name = string("k_cache_113_cast_fp16")]; + tensor v_cache_113_begin_0 = const()[name = string("v_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor v_cache_113_end_0 = const()[name = string("v_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; + tensor v_cache_113_end_mask_0 = const()[name = string("v_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_113_squeeze_mask_0 = const()[name = string("v_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_113_cast_fp16 = slice_by_index(begin = v_cache_113_begin_0, end = v_cache_113_end_0, end_mask = v_cache_113_end_mask_0, squeeze_mask = v_cache_113_squeeze_mask_0, x = coreml_update_state_119)[name = string("v_cache_113_cast_fp16")]; + tensor k_cache_115_begin_0 = const()[name = string("k_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor k_cache_115_end_0 = const()[name = string("k_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; + tensor k_cache_115_end_mask_0 = const()[name = string("k_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_115_squeeze_mask_0 = const()[name = string("k_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_115_cast_fp16 = slice_by_index(begin = k_cache_115_begin_0, end = k_cache_115_end_0, end_mask = k_cache_115_end_mask_0, squeeze_mask = k_cache_115_squeeze_mask_0, x = read_state_2)[name = string("k_cache_115_cast_fp16")]; + tensor v_cache_115_begin_0 = const()[name = string("v_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor v_cache_115_end_0 = const()[name = string("v_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; + tensor v_cache_115_end_mask_0 = const()[name = string("v_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_115_squeeze_mask_0 = const()[name = string("v_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_115_cast_fp16 = slice_by_index(begin = v_cache_115_begin_0, end = v_cache_115_end_0, end_mask = v_cache_115_end_mask_0, squeeze_mask = v_cache_115_squeeze_mask_0, x = read_state_3)[name = string("v_cache_115_cast_fp16")]; + int32 var_6088 = const()[name = string("op_6088"), val = int32(-1)]; + tensor var_6106_axes_0 = const()[name = string("op_6106_axes_0"), val = tensor([-1])]; + tensor blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423855744)))]; + tensor blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423858368)))]; + fp16 var_6094_to_fp16 = const()[name = string("op_6094_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6106_cast_fp16 = layer_norm(axes = var_6106_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_507_cast_fp16)[name = string("op_6106_cast_fp16")]; + tensor var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423860992)))]; + tensor var_6118_to_fp16 = const()[name = string("op_6118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427137856)))]; + tensor linear_224_cast_fp16 = linear(bias = var_6118_to_fp16, weight = var_6117_to_fp16, x = var_6106_cast_fp16)[name = string("linear_224_cast_fp16")]; + tensor var_6121_to_fp16 = const()[name = string("op_6121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427140480)))]; + tensor linear_225_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6121_to_fp16, x = var_6106_cast_fp16)[name = string("linear_225_cast_fp16")]; + tensor var_6125_to_fp16 = const()[name = string("op_6125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430417344)))]; + tensor var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433694208)))]; + tensor linear_226_cast_fp16 = linear(bias = var_6126_to_fp16, weight = var_6125_to_fp16, x = var_6106_cast_fp16)[name = string("linear_226_cast_fp16")]; + tensor var_6128_shape_cast_fp16 = shape(x = linear_224_cast_fp16)[name = string("op_6128_shape_cast_fp16")]; + int32 gather_338_axis_0 = const()[name = string("gather_338_axis_0"), val = int32(0)]; + int32 gather_338_batch_dims_0 = const()[name = string("gather_338_batch_dims_0"), val = int32(0)]; + bool gather_338_validate_indices_0 = const()[name = string("gather_338_validate_indices_0"), val = bool(false)]; + string var_6128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_338_to_uint16 = const()[name = string("select_338_to_uint16"), val = uint16(1)]; + tensor var_6128_shape_cast_fp16_to_uint16 = cast(dtype = var_6128_shape_cast_fp16_to_uint16_dtype_0, x = var_6128_shape_cast_fp16)[name = string("cast_334")]; + uint16 gather_338_cast_uint16 = gather(axis = gather_338_axis_0, batch_dims = gather_338_batch_dims_0, indices = select_338_to_uint16, validate_indices = gather_338_validate_indices_0, x = var_6128_shape_cast_fp16_to_uint16)[name = string("gather_338_cast_uint16")]; + string gather_338_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_338_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_338_cast_uint16_to_int32 = cast(dtype = gather_338_cast_uint16_to_int32_dtype_0, x = gather_338_cast_uint16)[name = string("cast_333")]; + int32 end_step_59 = add(x = offset, y = gather_338_cast_uint16_to_int32)[name = string("end_step_59")]; + tensor expand_dims_448 = const()[name = string("expand_dims_448"), val = tensor([0])]; + tensor expand_dims_450 = const()[name = string("expand_dims_450"), val = tensor([0])]; + tensor expand_dims_451_axes_0 = const()[name = string("expand_dims_451_axes_0"), val = tensor([0])]; + tensor expand_dims_451 = expand_dims(axes = expand_dims_451_axes_0, x = end_step_59)[name = string("expand_dims_451")]; + tensor concat_620_values0_0 = const()[name = string("concat_620_values0_0"), val = tensor([28])]; + int32 concat_620_axis_0 = const()[name = string("concat_620_axis_0"), val = int32(0)]; + bool concat_620_interleave_0 = const()[name = string("concat_620_interleave_0"), val = bool(false)]; + tensor concat_620 = concat(axis = concat_620_axis_0, interleave = concat_620_interleave_0, values = (concat_620_values0_0, expand_dims_448, expand_dims_1, expand_dims_450))[name = string("concat_620")]; + tensor concat_621_values0_0 = const()[name = string("concat_621_values0_0"), val = tensor([0])]; + tensor concat_621_values1_0 = const()[name = string("concat_621_values1_0"), val = tensor([0])]; + tensor concat_621_values3_0 = const()[name = string("concat_621_values3_0"), val = tensor([0])]; + int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)]; + bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)]; + tensor concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (concat_621_values0_0, concat_621_values1_0, expand_dims_451, concat_621_values3_0))[name = string("concat_621")]; + tensor k_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = k_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = k_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_29_stride_0, update = linear_225_cast_fp16, x = coreml_update_state_118)[name = string("k_cache1_internal_tensor_assign_29_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_29_cast_fp16, input = k_cache1)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_120 = read_state(input = k_cache1)[name = string("coreml_update_state_120")]; + tensor v_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = v_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = v_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_29_stride_0, update = linear_226_cast_fp16, x = coreml_update_state_119)[name = string("v_cache1_internal_tensor_assign_29_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_29_cast_fp16, input = v_cache1)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_121 = read_state(input = v_cache1)[name = string("coreml_update_state_121")]; + int32 concat_626_values0_0 = const()[name = string("concat_626_values0_0"), val = int32(1)]; + int32 concat_626_values2_0 = const()[name = string("concat_626_values2_0"), val = int32(1280)]; + int32 concat_626_axis_0 = const()[name = string("concat_626_axis_0"), val = int32(0)]; + bool concat_626_interleave_0 = const()[name = string("concat_626_interleave_0"), val = bool(false)]; + tensor concat_626 = concat(axis = concat_626_axis_0, interleave = concat_626_interleave_0, values = (concat_626_values0_0, end_step_59, concat_626_values2_0))[name = string("concat_626")]; + tensor var_6144_begin_0 = const()[name = string("op_6144_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6144_end_mask_0 = const()[name = string("op_6144_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6144_cast_fp16 = slice_by_index(begin = var_6144_begin_0, end = concat_626, end_mask = var_6144_end_mask_0, x = k_cache_113_cast_fp16)[name = string("op_6144_cast_fp16")]; + tensor var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = concat_626, end_mask = var_6147_end_mask_0, x = v_cache_113_cast_fp16)[name = string("op_6147_cast_fp16")]; + tensor concat_628x = const()[name = string("concat_628x"), val = tensor([1, -1, 20, 64])]; + tensor var_6157_cast_fp16 = reshape(shape = concat_628x, x = linear_224_cast_fp16)[name = string("op_6157_cast_fp16")]; + tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_227_cast_fp16 = mul(x = var_6157_cast_fp16, y = const_272_to_fp16)[name = string("q_227_cast_fp16")]; + tensor concat_629x = const()[name = string("concat_629x"), val = tensor([1, -1, 20, 64])]; + tensor var_6164_cast_fp16 = reshape(shape = concat_629x, x = var_6144_cast_fp16)[name = string("op_6164_cast_fp16")]; + tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_285_cast_fp16 = mul(x = var_6164_cast_fp16, y = const_273_to_fp16)[name = string("k_285_cast_fp16")]; + tensor concat_630x = const()[name = string("concat_630x"), val = tensor([1, -1, 20, 64])]; + tensor var_6171_cast_fp16 = reshape(shape = concat_630x, x = var_6147_cast_fp16)[name = string("op_6171_cast_fp16")]; + tensor var_6172 = const()[name = string("op_6172"), val = tensor([0, 2, 1, 3])]; + bool qk_169_transpose_x_0 = const()[name = string("qk_169_transpose_x_0"), val = bool(false)]; + bool qk_169_transpose_y_0 = const()[name = string("qk_169_transpose_y_0"), val = bool(false)]; + tensor transpose_369_perm_0 = const()[name = string("transpose_369_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_370_perm_0 = const()[name = string("transpose_370_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_370 = transpose(perm = transpose_370_perm_0, x = k_285_cast_fp16)[name = string("transpose_414")]; + tensor transpose_369 = transpose(perm = transpose_369_perm_0, x = q_227_cast_fp16)[name = string("transpose_415")]; + tensor qk_169_cast_fp16 = matmul(transpose_x = qk_169_transpose_x_0, transpose_y = qk_169_transpose_y_0, x = transpose_369, y = transpose_370)[name = string("qk_169_cast_fp16")]; + int32 concat_631_values1_0 = const()[name = string("concat_631_values1_0"), val = int32(448)]; + int32 concat_631_axis_0 = const()[name = string("concat_631_axis_0"), val = int32(0)]; + bool concat_631_interleave_0 = const()[name = string("concat_631_interleave_0"), val = bool(false)]; + tensor concat_631 = concat(axis = concat_631_axis_0, interleave = concat_631_interleave_0, values = (gather_338_cast_uint16_to_int32, concat_631_values1_0))[name = string("concat_631")]; + tensor var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor([0, 0])]; + tensor var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor([false, true])]; + tensor var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = concat_631, end_mask = var_6175_end_mask_0, x = mask_to_fp16)[name = string("op_6175_cast_fp16")]; + int32 concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = int32(0)]; + int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)]; + bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)]; + tensor concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, gather_338_cast_uint16_to_int32))[name = string("concat_632")]; + tensor var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor([0, 0])]; + tensor var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor([true, false])]; + tensor var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = concat_632, end_mask = var_6176_end_mask_0, x = var_6175_cast_fp16)[name = string("op_6176_cast_fp16")]; + tensor qk_171_cast_fp16 = add(x = qk_169_cast_fp16, y = var_6176_cast_fp16)[name = string("qk_171_cast_fp16")]; + tensor var_6179_cast_fp16 = softmax(axis = var_6088, x = qk_171_cast_fp16)[name = string("op_6179_cast_fp16")]; + bool var_6181_transpose_x_0 = const()[name = string("op_6181_transpose_x_0"), val = bool(false)]; + bool var_6181_transpose_y_0 = const()[name = string("op_6181_transpose_y_0"), val = bool(false)]; + tensor v_285_cast_fp16 = transpose(perm = var_6172, x = var_6171_cast_fp16)[name = string("transpose_416")]; + tensor var_6181_cast_fp16 = matmul(transpose_x = var_6181_transpose_x_0, transpose_y = var_6181_transpose_y_0, x = var_6179_cast_fp16, y = v_285_cast_fp16)[name = string("op_6181_cast_fp16")]; + tensor var_6182 = const()[name = string("op_6182"), val = tensor([0, 2, 1, 3])]; + tensor concat_633x = const()[name = string("concat_633x"), val = tensor([1, -1, 1280])]; + tensor var_6183_cast_fp16 = transpose(perm = var_6182, x = var_6181_cast_fp16)[name = string("transpose_413")]; + tensor x_511_cast_fp16 = reshape(shape = concat_633x, x = var_6183_cast_fp16)[name = string("x_511_cast_fp16")]; + tensor var_6187_to_fp16 = const()[name = string("op_6187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433696832)))]; + tensor var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436973696)))]; + tensor linear_227_cast_fp16 = linear(bias = var_6188_to_fp16, weight = var_6187_to_fp16, x = x_511_cast_fp16)[name = string("linear_227_cast_fp16")]; + tensor x_513_cast_fp16 = add(x = x_507_cast_fp16, y = linear_227_cast_fp16)[name = string("x_513_cast_fp16")]; + tensor var_6195_axes_0 = const()[name = string("op_6195_axes_0"), val = tensor([-1])]; + tensor blocks_28_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436976320)))]; + tensor blocks_28_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436978944)))]; + tensor var_6195_cast_fp16 = layer_norm(axes = var_6195_axes_0, beta = blocks_28_cross_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_cross_attn_ln_weight_to_fp16, x = x_513_cast_fp16)[name = string("op_6195_cast_fp16")]; + tensor var_6204_to_fp16 = const()[name = string("op_6204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436981568)))]; + tensor var_6205_to_fp16 = const()[name = string("op_6205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440258432)))]; + tensor linear_228_cast_fp16 = linear(bias = var_6205_to_fp16, weight = var_6204_to_fp16, x = var_6195_cast_fp16)[name = string("linear_228_cast_fp16")]; + tensor concat_634 = const()[name = string("concat_634"), val = tensor([0, 0, 0])]; + tensor concat_635 = const()[name = string("concat_635"), val = tensor([0, 1500, 0])]; + tensor k_287_internal_tensor_assign_1_stride_0 = const()[name = string("k_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_634, begin_mask = k_287_internal_tensor_assign_1_begin_mask_0, end = concat_635, end_mask = k_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_287_internal_tensor_assign_1_squeeze_mask_0, stride = k_287_internal_tensor_assign_1_stride_0, update = k_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("k_287_internal_tensor_assign_1_cast_fp16")]; + tensor concat_636 = const()[name = string("concat_636"), val = tensor([0, 0, 0])]; + tensor concat_637 = const()[name = string("concat_637"), val = tensor([0, 1500, 0])]; + tensor v_287_internal_tensor_assign_1_stride_0 = const()[name = string("v_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_636, begin_mask = v_287_internal_tensor_assign_1_begin_mask_0, end = concat_637, end_mask = v_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_287_internal_tensor_assign_1_squeeze_mask_0, stride = v_287_internal_tensor_assign_1_stride_0, update = v_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("v_287_internal_tensor_assign_1_cast_fp16")]; + tensor concat_638x = const()[name = string("concat_638x"), val = tensor([1, -1, 20, 64])]; + tensor var_6225_cast_fp16 = reshape(shape = concat_638x, x = linear_228_cast_fp16)[name = string("op_6225_cast_fp16")]; + tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_231_cast_fp16 = mul(x = var_6225_cast_fp16, y = const_274_to_fp16)[name = string("q_231_cast_fp16")]; + tensor var_6231 = const()[name = string("op_6231"), val = tensor([1, 1500, 20, -1])]; + tensor var_6232_cast_fp16 = reshape(shape = var_6231, x = k_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6232_cast_fp16")]; + tensor const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_289_cast_fp16 = mul(x = var_6232_cast_fp16, y = const_275_to_fp16)[name = string("k_289_cast_fp16")]; + tensor var_6238 = const()[name = string("op_6238"), val = tensor([1, 1500, 20, -1])]; + tensor var_6239_cast_fp16 = reshape(shape = var_6238, x = v_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6239_cast_fp16")]; + tensor var_6240 = const()[name = string("op_6240"), val = tensor([0, 2, 1, 3])]; + bool qk_173_transpose_x_0 = const()[name = string("qk_173_transpose_x_0"), val = bool(false)]; + bool qk_173_transpose_y_0 = const()[name = string("qk_173_transpose_y_0"), val = bool(false)]; + tensor transpose_371_perm_0 = const()[name = string("transpose_371_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_372_perm_0 = const()[name = string("transpose_372_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_372 = transpose(perm = transpose_372_perm_0, x = k_289_cast_fp16)[name = string("transpose_410")]; + tensor transpose_371 = transpose(perm = transpose_371_perm_0, x = q_231_cast_fp16)[name = string("transpose_411")]; + tensor qk_173_cast_fp16 = matmul(transpose_x = qk_173_transpose_x_0, transpose_y = qk_173_transpose_y_0, x = transpose_371, y = transpose_372)[name = string("qk_173_cast_fp16")]; + tensor var_6244_cast_fp16 = softmax(axis = var_6088, x = qk_173_cast_fp16)[name = string("op_6244_cast_fp16")]; + bool var_6246_transpose_x_0 = const()[name = string("op_6246_transpose_x_0"), val = bool(false)]; + bool var_6246_transpose_y_0 = const()[name = string("op_6246_transpose_y_0"), val = bool(false)]; + tensor v_289_cast_fp16 = transpose(perm = var_6240, x = var_6239_cast_fp16)[name = string("transpose_412")]; + tensor var_6246_cast_fp16 = matmul(transpose_x = var_6246_transpose_x_0, transpose_y = var_6246_transpose_y_0, x = var_6244_cast_fp16, y = v_289_cast_fp16)[name = string("op_6246_cast_fp16")]; + tensor var_6247 = const()[name = string("op_6247"), val = tensor([0, 2, 1, 3])]; + tensor concat_639x = const()[name = string("concat_639x"), val = tensor([1, -1, 1280])]; + tensor var_6248_cast_fp16 = transpose(perm = var_6247, x = var_6246_cast_fp16)[name = string("transpose_409")]; + tensor x_517_cast_fp16 = reshape(shape = concat_639x, x = var_6248_cast_fp16)[name = string("x_517_cast_fp16")]; + tensor var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440261056)))]; + tensor var_6253_to_fp16 = const()[name = string("op_6253_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443537920)))]; + tensor linear_229_cast_fp16 = linear(bias = var_6253_to_fp16, weight = var_6252_to_fp16, x = x_517_cast_fp16)[name = string("linear_229_cast_fp16")]; + tensor x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_229_cast_fp16)[name = string("x_519_cast_fp16")]; + tensor var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor([-1])]; + tensor blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443540544)))]; + tensor blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443543168)))]; + tensor var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_519_cast_fp16)[name = string("op_6260_cast_fp16")]; + tensor var_6269_to_fp16 = const()[name = string("op_6269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443545792)))]; + tensor var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456653056)))]; + tensor linear_230_cast_fp16 = linear(bias = var_6270_to_fp16, weight = var_6269_to_fp16, x = var_6260_cast_fp16)[name = string("linear_230_cast_fp16")]; + string x_523_mode_0 = const()[name = string("x_523_mode_0"), val = string("EXACT")]; + tensor x_523_cast_fp16 = gelu(mode = x_523_mode_0, x = linear_230_cast_fp16)[name = string("x_523_cast_fp16")]; + tensor var_6275_to_fp16 = const()[name = string("op_6275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456663360)))]; + tensor var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469770624)))]; + tensor linear_231_cast_fp16 = linear(bias = var_6276_to_fp16, weight = var_6275_to_fp16, x = x_523_cast_fp16)[name = string("linear_231_cast_fp16")]; + tensor x_525_cast_fp16 = add(x = x_519_cast_fp16, y = linear_231_cast_fp16)[name = string("x_525_cast_fp16")]; + tensor k_cache_117_begin_0 = const()[name = string("k_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor k_cache_117_end_0 = const()[name = string("k_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; + tensor k_cache_117_end_mask_0 = const()[name = string("k_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_117_squeeze_mask_0 = const()[name = string("k_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_117_cast_fp16 = slice_by_index(begin = k_cache_117_begin_0, end = k_cache_117_end_0, end_mask = k_cache_117_end_mask_0, squeeze_mask = k_cache_117_squeeze_mask_0, x = coreml_update_state_120)[name = string("k_cache_117_cast_fp16")]; + tensor v_cache_117_begin_0 = const()[name = string("v_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor v_cache_117_end_0 = const()[name = string("v_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; + tensor v_cache_117_end_mask_0 = const()[name = string("v_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_117_squeeze_mask_0 = const()[name = string("v_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_117_cast_fp16 = slice_by_index(begin = v_cache_117_begin_0, end = v_cache_117_end_0, end_mask = v_cache_117_end_mask_0, squeeze_mask = v_cache_117_squeeze_mask_0, x = coreml_update_state_121)[name = string("v_cache_117_cast_fp16")]; + tensor k_cache_119_begin_0 = const()[name = string("k_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor k_cache_119_end_0 = const()[name = string("k_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; + tensor k_cache_119_end_mask_0 = const()[name = string("k_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_119_squeeze_mask_0 = const()[name = string("k_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_119_cast_fp16 = slice_by_index(begin = k_cache_119_begin_0, end = k_cache_119_end_0, end_mask = k_cache_119_end_mask_0, squeeze_mask = k_cache_119_squeeze_mask_0, x = read_state_2)[name = string("k_cache_119_cast_fp16")]; + tensor v_cache_119_begin_0 = const()[name = string("v_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor v_cache_119_end_0 = const()[name = string("v_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; + tensor v_cache_119_end_mask_0 = const()[name = string("v_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_119_squeeze_mask_0 = const()[name = string("v_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_119_cast_fp16 = slice_by_index(begin = v_cache_119_begin_0, end = v_cache_119_end_0, end_mask = v_cache_119_end_mask_0, squeeze_mask = v_cache_119_squeeze_mask_0, x = read_state_3)[name = string("v_cache_119_cast_fp16")]; + int32 var_6299 = const()[name = string("op_6299"), val = int32(-1)]; + tensor var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor([-1])]; + tensor blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469773248)))]; + tensor blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469775872)))]; + fp16 var_6305_to_fp16 = const()[name = string("op_6305_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6317_cast_fp16 = layer_norm(axes = var_6317_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_525_cast_fp16)[name = string("op_6317_cast_fp16")]; + tensor var_6328_to_fp16 = const()[name = string("op_6328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469778496)))]; + tensor var_6329_to_fp16 = const()[name = string("op_6329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473055360)))]; + tensor linear_232_cast_fp16 = linear(bias = var_6329_to_fp16, weight = var_6328_to_fp16, x = var_6317_cast_fp16)[name = string("linear_232_cast_fp16")]; + tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473057984)))]; + tensor linear_233_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6332_to_fp16, x = var_6317_cast_fp16)[name = string("linear_233_cast_fp16")]; + tensor var_6336_to_fp16 = const()[name = string("op_6336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1476334848)))]; + tensor var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479611712)))]; + tensor linear_234_cast_fp16 = linear(bias = var_6337_to_fp16, weight = var_6336_to_fp16, x = var_6317_cast_fp16)[name = string("linear_234_cast_fp16")]; + tensor var_6339_shape_cast_fp16 = shape(x = linear_232_cast_fp16)[name = string("op_6339_shape_cast_fp16")]; + int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)]; + int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)]; + bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)]; + string var_6339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_350_to_uint16 = const()[name = string("select_350_to_uint16"), val = uint16(1)]; + tensor var_6339_shape_cast_fp16_to_uint16 = cast(dtype = var_6339_shape_cast_fp16_to_uint16_dtype_0, x = var_6339_shape_cast_fp16)[name = string("cast_332")]; + uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = select_350_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_6339_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")]; + string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_331")]; + int32 end_step_61 = add(x = offset, y = gather_350_cast_uint16_to_int32)[name = string("end_step_61")]; + tensor expand_dims_464 = const()[name = string("expand_dims_464"), val = tensor([0])]; + tensor expand_dims_466 = const()[name = string("expand_dims_466"), val = tensor([0])]; + tensor expand_dims_467_axes_0 = const()[name = string("expand_dims_467_axes_0"), val = tensor([0])]; + tensor expand_dims_467 = expand_dims(axes = expand_dims_467_axes_0, x = end_step_61)[name = string("expand_dims_467")]; + tensor concat_642_values0_0 = const()[name = string("concat_642_values0_0"), val = tensor([29])]; + int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)]; + bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)]; + tensor concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (concat_642_values0_0, expand_dims_464, expand_dims_1, expand_dims_466))[name = string("concat_642")]; + tensor concat_643_values0_0 = const()[name = string("concat_643_values0_0"), val = tensor([0])]; + tensor concat_643_values1_0 = const()[name = string("concat_643_values1_0"), val = tensor([0])]; + tensor concat_643_values3_0 = const()[name = string("concat_643_values3_0"), val = tensor([0])]; + int32 concat_643_axis_0 = const()[name = string("concat_643_axis_0"), val = int32(0)]; + bool concat_643_interleave_0 = const()[name = string("concat_643_interleave_0"), val = bool(false)]; + tensor concat_643 = concat(axis = concat_643_axis_0, interleave = concat_643_interleave_0, values = (concat_643_values0_0, concat_643_values1_0, expand_dims_467, concat_643_values3_0))[name = string("concat_643")]; + tensor k_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = k_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = k_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_30_stride_0, update = linear_233_cast_fp16, x = coreml_update_state_120)[name = string("k_cache1_internal_tensor_assign_30_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_30_cast_fp16, input = k_cache1)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_122 = read_state(input = k_cache1)[name = string("coreml_update_state_122")]; + tensor v_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = v_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = v_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_30_stride_0, update = linear_234_cast_fp16, x = coreml_update_state_121)[name = string("v_cache1_internal_tensor_assign_30_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_30_cast_fp16, input = v_cache1)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_123 = read_state(input = v_cache1)[name = string("coreml_update_state_123")]; + int32 concat_648_values0_0 = const()[name = string("concat_648_values0_0"), val = int32(1)]; + int32 concat_648_values2_0 = const()[name = string("concat_648_values2_0"), val = int32(1280)]; + int32 concat_648_axis_0 = const()[name = string("concat_648_axis_0"), val = int32(0)]; + bool concat_648_interleave_0 = const()[name = string("concat_648_interleave_0"), val = bool(false)]; + tensor concat_648 = concat(axis = concat_648_axis_0, interleave = concat_648_interleave_0, values = (concat_648_values0_0, end_step_61, concat_648_values2_0))[name = string("concat_648")]; + tensor var_6355_begin_0 = const()[name = string("op_6355_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6355_end_mask_0 = const()[name = string("op_6355_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = concat_648, end_mask = var_6355_end_mask_0, x = k_cache_117_cast_fp16)[name = string("op_6355_cast_fp16")]; + tensor var_6358_begin_0 = const()[name = string("op_6358_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6358_end_mask_0 = const()[name = string("op_6358_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6358_cast_fp16 = slice_by_index(begin = var_6358_begin_0, end = concat_648, end_mask = var_6358_end_mask_0, x = v_cache_117_cast_fp16)[name = string("op_6358_cast_fp16")]; + tensor concat_650x = const()[name = string("concat_650x"), val = tensor([1, -1, 20, 64])]; + tensor var_6368_cast_fp16 = reshape(shape = concat_650x, x = linear_232_cast_fp16)[name = string("op_6368_cast_fp16")]; + tensor const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_235_cast_fp16 = mul(x = var_6368_cast_fp16, y = const_276_to_fp16)[name = string("q_235_cast_fp16")]; + tensor concat_651x = const()[name = string("concat_651x"), val = tensor([1, -1, 20, 64])]; + tensor var_6375_cast_fp16 = reshape(shape = concat_651x, x = var_6355_cast_fp16)[name = string("op_6375_cast_fp16")]; + tensor const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_295_cast_fp16 = mul(x = var_6375_cast_fp16, y = const_277_to_fp16)[name = string("k_295_cast_fp16")]; + tensor concat_652x = const()[name = string("concat_652x"), val = tensor([1, -1, 20, 64])]; + tensor var_6382_cast_fp16 = reshape(shape = concat_652x, x = var_6358_cast_fp16)[name = string("op_6382_cast_fp16")]; + tensor var_6383 = const()[name = string("op_6383"), val = tensor([0, 2, 1, 3])]; + bool qk_175_transpose_x_0 = const()[name = string("qk_175_transpose_x_0"), val = bool(false)]; + bool qk_175_transpose_y_0 = const()[name = string("qk_175_transpose_y_0"), val = bool(false)]; + tensor transpose_373_perm_0 = const()[name = string("transpose_373_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_374_perm_0 = const()[name = string("transpose_374_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_374 = transpose(perm = transpose_374_perm_0, x = k_295_cast_fp16)[name = string("transpose_406")]; + tensor transpose_373 = transpose(perm = transpose_373_perm_0, x = q_235_cast_fp16)[name = string("transpose_407")]; + tensor qk_175_cast_fp16 = matmul(transpose_x = qk_175_transpose_x_0, transpose_y = qk_175_transpose_y_0, x = transpose_373, y = transpose_374)[name = string("qk_175_cast_fp16")]; + int32 concat_653_values1_0 = const()[name = string("concat_653_values1_0"), val = int32(448)]; + int32 concat_653_axis_0 = const()[name = string("concat_653_axis_0"), val = int32(0)]; + bool concat_653_interleave_0 = const()[name = string("concat_653_interleave_0"), val = bool(false)]; + tensor concat_653 = concat(axis = concat_653_axis_0, interleave = concat_653_interleave_0, values = (gather_350_cast_uint16_to_int32, concat_653_values1_0))[name = string("concat_653")]; + tensor var_6386_begin_0 = const()[name = string("op_6386_begin_0"), val = tensor([0, 0])]; + tensor var_6386_end_mask_0 = const()[name = string("op_6386_end_mask_0"), val = tensor([false, true])]; + tensor var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = concat_653, end_mask = var_6386_end_mask_0, x = mask_to_fp16)[name = string("op_6386_cast_fp16")]; + int32 concat_654_values0_0 = const()[name = string("concat_654_values0_0"), val = int32(0)]; + int32 concat_654_axis_0 = const()[name = string("concat_654_axis_0"), val = int32(0)]; + bool concat_654_interleave_0 = const()[name = string("concat_654_interleave_0"), val = bool(false)]; + tensor concat_654 = concat(axis = concat_654_axis_0, interleave = concat_654_interleave_0, values = (concat_654_values0_0, gather_350_cast_uint16_to_int32))[name = string("concat_654")]; + tensor var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor([0, 0])]; + tensor var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor([true, false])]; + tensor var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = concat_654, end_mask = var_6387_end_mask_0, x = var_6386_cast_fp16)[name = string("op_6387_cast_fp16")]; + tensor qk_177_cast_fp16 = add(x = qk_175_cast_fp16, y = var_6387_cast_fp16)[name = string("qk_177_cast_fp16")]; + tensor var_6390_cast_fp16 = softmax(axis = var_6299, x = qk_177_cast_fp16)[name = string("op_6390_cast_fp16")]; + bool var_6392_transpose_x_0 = const()[name = string("op_6392_transpose_x_0"), val = bool(false)]; + bool var_6392_transpose_y_0 = const()[name = string("op_6392_transpose_y_0"), val = bool(false)]; + tensor v_295_cast_fp16 = transpose(perm = var_6383, x = var_6382_cast_fp16)[name = string("transpose_408")]; + tensor var_6392_cast_fp16 = matmul(transpose_x = var_6392_transpose_x_0, transpose_y = var_6392_transpose_y_0, x = var_6390_cast_fp16, y = v_295_cast_fp16)[name = string("op_6392_cast_fp16")]; + tensor var_6393 = const()[name = string("op_6393"), val = tensor([0, 2, 1, 3])]; + tensor concat_655x = const()[name = string("concat_655x"), val = tensor([1, -1, 1280])]; + tensor var_6394_cast_fp16 = transpose(perm = var_6393, x = var_6392_cast_fp16)[name = string("transpose_405")]; + tensor x_529_cast_fp16 = reshape(shape = concat_655x, x = var_6394_cast_fp16)[name = string("x_529_cast_fp16")]; + tensor var_6398_to_fp16 = const()[name = string("op_6398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479614336)))]; + tensor var_6399_to_fp16 = const()[name = string("op_6399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482891200)))]; + tensor linear_235_cast_fp16 = linear(bias = var_6399_to_fp16, weight = var_6398_to_fp16, x = x_529_cast_fp16)[name = string("linear_235_cast_fp16")]; + tensor x_531_cast_fp16 = add(x = x_525_cast_fp16, y = linear_235_cast_fp16)[name = string("x_531_cast_fp16")]; + tensor var_6406_axes_0 = const()[name = string("op_6406_axes_0"), val = tensor([-1])]; + tensor blocks_29_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482893824)))]; + tensor blocks_29_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482896448)))]; + tensor var_6406_cast_fp16 = layer_norm(axes = var_6406_axes_0, beta = blocks_29_cross_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_cross_attn_ln_weight_to_fp16, x = x_531_cast_fp16)[name = string("op_6406_cast_fp16")]; + tensor var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482899072)))]; + tensor var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486175936)))]; + tensor linear_236_cast_fp16 = linear(bias = var_6416_to_fp16, weight = var_6415_to_fp16, x = var_6406_cast_fp16)[name = string("linear_236_cast_fp16")]; + tensor concat_656 = const()[name = string("concat_656"), val = tensor([0, 0, 0])]; + tensor concat_657 = const()[name = string("concat_657"), val = tensor([0, 1500, 0])]; + tensor k_297_internal_tensor_assign_1_stride_0 = const()[name = string("k_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_656, begin_mask = k_297_internal_tensor_assign_1_begin_mask_0, end = concat_657, end_mask = k_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_297_internal_tensor_assign_1_squeeze_mask_0, stride = k_297_internal_tensor_assign_1_stride_0, update = k_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("k_297_internal_tensor_assign_1_cast_fp16")]; + tensor concat_658 = const()[name = string("concat_658"), val = tensor([0, 0, 0])]; + tensor concat_659 = const()[name = string("concat_659"), val = tensor([0, 1500, 0])]; + tensor v_297_internal_tensor_assign_1_stride_0 = const()[name = string("v_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_658, begin_mask = v_297_internal_tensor_assign_1_begin_mask_0, end = concat_659, end_mask = v_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_297_internal_tensor_assign_1_squeeze_mask_0, stride = v_297_internal_tensor_assign_1_stride_0, update = v_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("v_297_internal_tensor_assign_1_cast_fp16")]; + tensor concat_660x = const()[name = string("concat_660x"), val = tensor([1, -1, 20, 64])]; + tensor var_6436_cast_fp16 = reshape(shape = concat_660x, x = linear_236_cast_fp16)[name = string("op_6436_cast_fp16")]; + tensor const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_239_cast_fp16 = mul(x = var_6436_cast_fp16, y = const_278_to_fp16)[name = string("q_239_cast_fp16")]; + tensor var_6442 = const()[name = string("op_6442"), val = tensor([1, 1500, 20, -1])]; + tensor var_6443_cast_fp16 = reshape(shape = var_6442, x = k_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6443_cast_fp16")]; + tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_299_cast_fp16 = mul(x = var_6443_cast_fp16, y = const_279_to_fp16)[name = string("k_299_cast_fp16")]; + tensor var_6449 = const()[name = string("op_6449"), val = tensor([1, 1500, 20, -1])]; + tensor var_6450_cast_fp16 = reshape(shape = var_6449, x = v_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6450_cast_fp16")]; + tensor var_6451 = const()[name = string("op_6451"), val = tensor([0, 2, 1, 3])]; + bool qk_179_transpose_x_0 = const()[name = string("qk_179_transpose_x_0"), val = bool(false)]; + bool qk_179_transpose_y_0 = const()[name = string("qk_179_transpose_y_0"), val = bool(false)]; + tensor transpose_375_perm_0 = const()[name = string("transpose_375_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_376_perm_0 = const()[name = string("transpose_376_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_376 = transpose(perm = transpose_376_perm_0, x = k_299_cast_fp16)[name = string("transpose_402")]; + tensor transpose_375 = transpose(perm = transpose_375_perm_0, x = q_239_cast_fp16)[name = string("transpose_403")]; + tensor qk_179_cast_fp16 = matmul(transpose_x = qk_179_transpose_x_0, transpose_y = qk_179_transpose_y_0, x = transpose_375, y = transpose_376)[name = string("qk_179_cast_fp16")]; + tensor var_6455_cast_fp16 = softmax(axis = var_6299, x = qk_179_cast_fp16)[name = string("op_6455_cast_fp16")]; + bool var_6457_transpose_x_0 = const()[name = string("op_6457_transpose_x_0"), val = bool(false)]; + bool var_6457_transpose_y_0 = const()[name = string("op_6457_transpose_y_0"), val = bool(false)]; + tensor v_299_cast_fp16 = transpose(perm = var_6451, x = var_6450_cast_fp16)[name = string("transpose_404")]; + tensor var_6457_cast_fp16 = matmul(transpose_x = var_6457_transpose_x_0, transpose_y = var_6457_transpose_y_0, x = var_6455_cast_fp16, y = v_299_cast_fp16)[name = string("op_6457_cast_fp16")]; + tensor var_6458 = const()[name = string("op_6458"), val = tensor([0, 2, 1, 3])]; + tensor concat_661x = const()[name = string("concat_661x"), val = tensor([1, -1, 1280])]; + tensor var_6459_cast_fp16 = transpose(perm = var_6458, x = var_6457_cast_fp16)[name = string("transpose_401")]; + tensor x_535_cast_fp16 = reshape(shape = concat_661x, x = var_6459_cast_fp16)[name = string("x_535_cast_fp16")]; + tensor var_6463_to_fp16 = const()[name = string("op_6463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486178560)))]; + tensor var_6464_to_fp16 = const()[name = string("op_6464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489455424)))]; + tensor linear_237_cast_fp16 = linear(bias = var_6464_to_fp16, weight = var_6463_to_fp16, x = x_535_cast_fp16)[name = string("linear_237_cast_fp16")]; + tensor x_537_cast_fp16 = add(x = x_531_cast_fp16, y = linear_237_cast_fp16)[name = string("x_537_cast_fp16")]; + tensor var_6471_axes_0 = const()[name = string("op_6471_axes_0"), val = tensor([-1])]; + tensor blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489458048)))]; + tensor blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489460672)))]; + tensor var_6471_cast_fp16 = layer_norm(axes = var_6471_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_537_cast_fp16)[name = string("op_6471_cast_fp16")]; + tensor var_6480_to_fp16 = const()[name = string("op_6480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489463296)))]; + tensor var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502570560)))]; + tensor linear_238_cast_fp16 = linear(bias = var_6481_to_fp16, weight = var_6480_to_fp16, x = var_6471_cast_fp16)[name = string("linear_238_cast_fp16")]; + string x_541_mode_0 = const()[name = string("x_541_mode_0"), val = string("EXACT")]; + tensor x_541_cast_fp16 = gelu(mode = x_541_mode_0, x = linear_238_cast_fp16)[name = string("x_541_cast_fp16")]; + tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502580864)))]; + tensor var_6487_to_fp16 = const()[name = string("op_6487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515688128)))]; + tensor linear_239_cast_fp16 = linear(bias = var_6487_to_fp16, weight = var_6486_to_fp16, x = x_541_cast_fp16)[name = string("linear_239_cast_fp16")]; + tensor x_543_cast_fp16 = add(x = x_537_cast_fp16, y = linear_239_cast_fp16)[name = string("x_543_cast_fp16")]; + tensor k_cache_121_begin_0 = const()[name = string("k_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor k_cache_121_end_0 = const()[name = string("k_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; + tensor k_cache_121_end_mask_0 = const()[name = string("k_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_121_squeeze_mask_0 = const()[name = string("k_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_121_cast_fp16 = slice_by_index(begin = k_cache_121_begin_0, end = k_cache_121_end_0, end_mask = k_cache_121_end_mask_0, squeeze_mask = k_cache_121_squeeze_mask_0, x = coreml_update_state_122)[name = string("k_cache_121_cast_fp16")]; + tensor v_cache_121_begin_0 = const()[name = string("v_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor v_cache_121_end_0 = const()[name = string("v_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; + tensor v_cache_121_end_mask_0 = const()[name = string("v_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_121_squeeze_mask_0 = const()[name = string("v_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_121_cast_fp16 = slice_by_index(begin = v_cache_121_begin_0, end = v_cache_121_end_0, end_mask = v_cache_121_end_mask_0, squeeze_mask = v_cache_121_squeeze_mask_0, x = coreml_update_state_123)[name = string("v_cache_121_cast_fp16")]; + tensor k_cache_123_begin_0 = const()[name = string("k_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor k_cache_123_end_0 = const()[name = string("k_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; + tensor k_cache_123_end_mask_0 = const()[name = string("k_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_123_squeeze_mask_0 = const()[name = string("k_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_123_cast_fp16 = slice_by_index(begin = k_cache_123_begin_0, end = k_cache_123_end_0, end_mask = k_cache_123_end_mask_0, squeeze_mask = k_cache_123_squeeze_mask_0, x = read_state_2)[name = string("k_cache_123_cast_fp16")]; + tensor v_cache_123_begin_0 = const()[name = string("v_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor v_cache_123_end_0 = const()[name = string("v_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; + tensor v_cache_123_end_mask_0 = const()[name = string("v_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_123_squeeze_mask_0 = const()[name = string("v_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_123_cast_fp16 = slice_by_index(begin = v_cache_123_begin_0, end = v_cache_123_end_0, end_mask = v_cache_123_end_mask_0, squeeze_mask = v_cache_123_squeeze_mask_0, x = read_state_3)[name = string("v_cache_123_cast_fp16")]; + int32 var_6510 = const()[name = string("op_6510"), val = int32(-1)]; + tensor var_6528_axes_0 = const()[name = string("op_6528_axes_0"), val = tensor([-1])]; + tensor blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515690752)))]; + tensor blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515693376)))]; + fp16 var_6516_to_fp16 = const()[name = string("op_6516_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6528_cast_fp16 = layer_norm(axes = var_6528_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_543_cast_fp16)[name = string("op_6528_cast_fp16")]; + tensor var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515696000)))]; + tensor var_6540_to_fp16 = const()[name = string("op_6540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518972864)))]; + tensor linear_240_cast_fp16 = linear(bias = var_6540_to_fp16, weight = var_6539_to_fp16, x = var_6528_cast_fp16)[name = string("linear_240_cast_fp16")]; + tensor var_6543_to_fp16 = const()[name = string("op_6543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518975488)))]; + tensor linear_241_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6543_to_fp16, x = var_6528_cast_fp16)[name = string("linear_241_cast_fp16")]; + tensor var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1522252352)))]; + tensor var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525529216)))]; + tensor linear_242_cast_fp16 = linear(bias = var_6548_to_fp16, weight = var_6547_to_fp16, x = var_6528_cast_fp16)[name = string("linear_242_cast_fp16")]; + tensor var_6550_shape_cast_fp16 = shape(x = linear_240_cast_fp16)[name = string("op_6550_shape_cast_fp16")]; + int32 gather_362_axis_0 = const()[name = string("gather_362_axis_0"), val = int32(0)]; + int32 gather_362_batch_dims_0 = const()[name = string("gather_362_batch_dims_0"), val = int32(0)]; + bool gather_362_validate_indices_0 = const()[name = string("gather_362_validate_indices_0"), val = bool(false)]; + string var_6550_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6550_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_362_to_uint16 = const()[name = string("select_362_to_uint16"), val = uint16(1)]; + tensor var_6550_shape_cast_fp16_to_uint16 = cast(dtype = var_6550_shape_cast_fp16_to_uint16_dtype_0, x = var_6550_shape_cast_fp16)[name = string("cast_330")]; + uint16 gather_362_cast_uint16 = gather(axis = gather_362_axis_0, batch_dims = gather_362_batch_dims_0, indices = select_362_to_uint16, validate_indices = gather_362_validate_indices_0, x = var_6550_shape_cast_fp16_to_uint16)[name = string("gather_362_cast_uint16")]; + string gather_362_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_362_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_362_cast_uint16_to_int32 = cast(dtype = gather_362_cast_uint16_to_int32_dtype_0, x = gather_362_cast_uint16)[name = string("cast_329")]; + int32 end_step_63 = add(x = offset, y = gather_362_cast_uint16_to_int32)[name = string("end_step_63")]; + tensor expand_dims_480 = const()[name = string("expand_dims_480"), val = tensor([0])]; + tensor expand_dims_482 = const()[name = string("expand_dims_482"), val = tensor([0])]; + tensor expand_dims_483_axes_0 = const()[name = string("expand_dims_483_axes_0"), val = tensor([0])]; + tensor expand_dims_483 = expand_dims(axes = expand_dims_483_axes_0, x = end_step_63)[name = string("expand_dims_483")]; + tensor concat_664_values0_0 = const()[name = string("concat_664_values0_0"), val = tensor([30])]; + int32 concat_664_axis_0 = const()[name = string("concat_664_axis_0"), val = int32(0)]; + bool concat_664_interleave_0 = const()[name = string("concat_664_interleave_0"), val = bool(false)]; + tensor concat_664 = concat(axis = concat_664_axis_0, interleave = concat_664_interleave_0, values = (concat_664_values0_0, expand_dims_480, expand_dims_1, expand_dims_482))[name = string("concat_664")]; + tensor concat_665_values0_0 = const()[name = string("concat_665_values0_0"), val = tensor([0])]; + tensor concat_665_values1_0 = const()[name = string("concat_665_values1_0"), val = tensor([0])]; + tensor concat_665_values3_0 = const()[name = string("concat_665_values3_0"), val = tensor([0])]; + int32 concat_665_axis_0 = const()[name = string("concat_665_axis_0"), val = int32(0)]; + bool concat_665_interleave_0 = const()[name = string("concat_665_interleave_0"), val = bool(false)]; + tensor concat_665 = concat(axis = concat_665_axis_0, interleave = concat_665_interleave_0, values = (concat_665_values0_0, concat_665_values1_0, expand_dims_483, concat_665_values3_0))[name = string("concat_665")]; + tensor k_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = k_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = k_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_31_stride_0, update = linear_241_cast_fp16, x = coreml_update_state_122)[name = string("k_cache1_internal_tensor_assign_31_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_31_cast_fp16, input = k_cache1)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_124 = read_state(input = k_cache1)[name = string("coreml_update_state_124")]; + tensor v_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = v_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = v_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_31_stride_0, update = linear_242_cast_fp16, x = coreml_update_state_123)[name = string("v_cache1_internal_tensor_assign_31_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_31_cast_fp16, input = v_cache1)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_125 = read_state(input = v_cache1)[name = string("coreml_update_state_125")]; + int32 concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = int32(1)]; + int32 concat_670_values2_0 = const()[name = string("concat_670_values2_0"), val = int32(1280)]; + int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)]; + bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)]; + tensor concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, end_step_63, concat_670_values2_0))[name = string("concat_670")]; + tensor var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = concat_670, end_mask = var_6566_end_mask_0, x = k_cache_121_cast_fp16)[name = string("op_6566_cast_fp16")]; + tensor var_6569_begin_0 = const()[name = string("op_6569_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6569_end_mask_0 = const()[name = string("op_6569_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6569_cast_fp16 = slice_by_index(begin = var_6569_begin_0, end = concat_670, end_mask = var_6569_end_mask_0, x = v_cache_121_cast_fp16)[name = string("op_6569_cast_fp16")]; + tensor concat_672x = const()[name = string("concat_672x"), val = tensor([1, -1, 20, 64])]; + tensor var_6579_cast_fp16 = reshape(shape = concat_672x, x = linear_240_cast_fp16)[name = string("op_6579_cast_fp16")]; + tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_243_cast_fp16 = mul(x = var_6579_cast_fp16, y = const_280_to_fp16)[name = string("q_243_cast_fp16")]; + tensor concat_673x = const()[name = string("concat_673x"), val = tensor([1, -1, 20, 64])]; + tensor var_6586_cast_fp16 = reshape(shape = concat_673x, x = var_6566_cast_fp16)[name = string("op_6586_cast_fp16")]; + tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_305_cast_fp16 = mul(x = var_6586_cast_fp16, y = const_281_to_fp16)[name = string("k_305_cast_fp16")]; + tensor concat_674x = const()[name = string("concat_674x"), val = tensor([1, -1, 20, 64])]; + tensor var_6593_cast_fp16 = reshape(shape = concat_674x, x = var_6569_cast_fp16)[name = string("op_6593_cast_fp16")]; + tensor var_6594 = const()[name = string("op_6594"), val = tensor([0, 2, 1, 3])]; + bool qk_181_transpose_x_0 = const()[name = string("qk_181_transpose_x_0"), val = bool(false)]; + bool qk_181_transpose_y_0 = const()[name = string("qk_181_transpose_y_0"), val = bool(false)]; + tensor transpose_377_perm_0 = const()[name = string("transpose_377_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_378_perm_0 = const()[name = string("transpose_378_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_378 = transpose(perm = transpose_378_perm_0, x = k_305_cast_fp16)[name = string("transpose_398")]; + tensor transpose_377 = transpose(perm = transpose_377_perm_0, x = q_243_cast_fp16)[name = string("transpose_399")]; + tensor qk_181_cast_fp16 = matmul(transpose_x = qk_181_transpose_x_0, transpose_y = qk_181_transpose_y_0, x = transpose_377, y = transpose_378)[name = string("qk_181_cast_fp16")]; + int32 concat_675_values1_0 = const()[name = string("concat_675_values1_0"), val = int32(448)]; + int32 concat_675_axis_0 = const()[name = string("concat_675_axis_0"), val = int32(0)]; + bool concat_675_interleave_0 = const()[name = string("concat_675_interleave_0"), val = bool(false)]; + tensor concat_675 = concat(axis = concat_675_axis_0, interleave = concat_675_interleave_0, values = (gather_362_cast_uint16_to_int32, concat_675_values1_0))[name = string("concat_675")]; + tensor var_6597_begin_0 = const()[name = string("op_6597_begin_0"), val = tensor([0, 0])]; + tensor var_6597_end_mask_0 = const()[name = string("op_6597_end_mask_0"), val = tensor([false, true])]; + tensor var_6597_cast_fp16 = slice_by_index(begin = var_6597_begin_0, end = concat_675, end_mask = var_6597_end_mask_0, x = mask_to_fp16)[name = string("op_6597_cast_fp16")]; + int32 concat_676_values0_0 = const()[name = string("concat_676_values0_0"), val = int32(0)]; + int32 concat_676_axis_0 = const()[name = string("concat_676_axis_0"), val = int32(0)]; + bool concat_676_interleave_0 = const()[name = string("concat_676_interleave_0"), val = bool(false)]; + tensor concat_676 = concat(axis = concat_676_axis_0, interleave = concat_676_interleave_0, values = (concat_676_values0_0, gather_362_cast_uint16_to_int32))[name = string("concat_676")]; + tensor var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor([0, 0])]; + tensor var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor([true, false])]; + tensor var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = concat_676, end_mask = var_6598_end_mask_0, x = var_6597_cast_fp16)[name = string("op_6598_cast_fp16")]; + tensor qk_183_cast_fp16 = add(x = qk_181_cast_fp16, y = var_6598_cast_fp16)[name = string("qk_183_cast_fp16")]; + tensor var_6601_cast_fp16 = softmax(axis = var_6510, x = qk_183_cast_fp16)[name = string("op_6601_cast_fp16")]; + bool var_6603_transpose_x_0 = const()[name = string("op_6603_transpose_x_0"), val = bool(false)]; + bool var_6603_transpose_y_0 = const()[name = string("op_6603_transpose_y_0"), val = bool(false)]; + tensor v_305_cast_fp16 = transpose(perm = var_6594, x = var_6593_cast_fp16)[name = string("transpose_400")]; + tensor var_6603_cast_fp16 = matmul(transpose_x = var_6603_transpose_x_0, transpose_y = var_6603_transpose_y_0, x = var_6601_cast_fp16, y = v_305_cast_fp16)[name = string("op_6603_cast_fp16")]; + tensor var_6604 = const()[name = string("op_6604"), val = tensor([0, 2, 1, 3])]; + tensor concat_677x = const()[name = string("concat_677x"), val = tensor([1, -1, 1280])]; + tensor var_6605_cast_fp16 = transpose(perm = var_6604, x = var_6603_cast_fp16)[name = string("transpose_397")]; + tensor x_547_cast_fp16 = reshape(shape = concat_677x, x = var_6605_cast_fp16)[name = string("x_547_cast_fp16")]; + tensor var_6609_to_fp16 = const()[name = string("op_6609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525531840)))]; + tensor var_6610_to_fp16 = const()[name = string("op_6610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528808704)))]; + tensor linear_243_cast_fp16 = linear(bias = var_6610_to_fp16, weight = var_6609_to_fp16, x = x_547_cast_fp16)[name = string("linear_243_cast_fp16")]; + tensor x_549_cast_fp16 = add(x = x_543_cast_fp16, y = linear_243_cast_fp16)[name = string("x_549_cast_fp16")]; + tensor var_6617_axes_0 = const()[name = string("op_6617_axes_0"), val = tensor([-1])]; + tensor blocks_30_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528811328)))]; + tensor blocks_30_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528813952)))]; + tensor var_6617_cast_fp16 = layer_norm(axes = var_6617_axes_0, beta = blocks_30_cross_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_cross_attn_ln_weight_to_fp16, x = x_549_cast_fp16)[name = string("op_6617_cast_fp16")]; + tensor var_6626_to_fp16 = const()[name = string("op_6626_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528816576)))]; + tensor var_6627_to_fp16 = const()[name = string("op_6627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532093440)))]; + tensor linear_244_cast_fp16 = linear(bias = var_6627_to_fp16, weight = var_6626_to_fp16, x = var_6617_cast_fp16)[name = string("linear_244_cast_fp16")]; + tensor concat_678 = const()[name = string("concat_678"), val = tensor([0, 0, 0])]; + tensor concat_679 = const()[name = string("concat_679"), val = tensor([0, 1500, 0])]; + tensor k_307_internal_tensor_assign_1_stride_0 = const()[name = string("k_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_678, begin_mask = k_307_internal_tensor_assign_1_begin_mask_0, end = concat_679, end_mask = k_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_307_internal_tensor_assign_1_squeeze_mask_0, stride = k_307_internal_tensor_assign_1_stride_0, update = k_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("k_307_internal_tensor_assign_1_cast_fp16")]; + tensor concat_680 = const()[name = string("concat_680"), val = tensor([0, 0, 0])]; + tensor concat_681 = const()[name = string("concat_681"), val = tensor([0, 1500, 0])]; + tensor v_307_internal_tensor_assign_1_stride_0 = const()[name = string("v_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_680, begin_mask = v_307_internal_tensor_assign_1_begin_mask_0, end = concat_681, end_mask = v_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_307_internal_tensor_assign_1_squeeze_mask_0, stride = v_307_internal_tensor_assign_1_stride_0, update = v_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("v_307_internal_tensor_assign_1_cast_fp16")]; + tensor concat_682x = const()[name = string("concat_682x"), val = tensor([1, -1, 20, 64])]; + tensor var_6647_cast_fp16 = reshape(shape = concat_682x, x = linear_244_cast_fp16)[name = string("op_6647_cast_fp16")]; + tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_247_cast_fp16 = mul(x = var_6647_cast_fp16, y = const_282_to_fp16)[name = string("q_247_cast_fp16")]; + tensor var_6653 = const()[name = string("op_6653"), val = tensor([1, 1500, 20, -1])]; + tensor var_6654_cast_fp16 = reshape(shape = var_6653, x = k_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6654_cast_fp16")]; + tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_309_cast_fp16 = mul(x = var_6654_cast_fp16, y = const_283_to_fp16)[name = string("k_309_cast_fp16")]; + tensor var_6660 = const()[name = string("op_6660"), val = tensor([1, 1500, 20, -1])]; + tensor var_6661_cast_fp16 = reshape(shape = var_6660, x = v_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6661_cast_fp16")]; + tensor var_6662 = const()[name = string("op_6662"), val = tensor([0, 2, 1, 3])]; + bool qk_185_transpose_x_0 = const()[name = string("qk_185_transpose_x_0"), val = bool(false)]; + bool qk_185_transpose_y_0 = const()[name = string("qk_185_transpose_y_0"), val = bool(false)]; + tensor transpose_379_perm_0 = const()[name = string("transpose_379_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_380_perm_0 = const()[name = string("transpose_380_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_380 = transpose(perm = transpose_380_perm_0, x = k_309_cast_fp16)[name = string("transpose_394")]; + tensor transpose_379 = transpose(perm = transpose_379_perm_0, x = q_247_cast_fp16)[name = string("transpose_395")]; + tensor qk_185_cast_fp16 = matmul(transpose_x = qk_185_transpose_x_0, transpose_y = qk_185_transpose_y_0, x = transpose_379, y = transpose_380)[name = string("qk_185_cast_fp16")]; + tensor var_6666_cast_fp16 = softmax(axis = var_6510, x = qk_185_cast_fp16)[name = string("op_6666_cast_fp16")]; + bool var_6668_transpose_x_0 = const()[name = string("op_6668_transpose_x_0"), val = bool(false)]; + bool var_6668_transpose_y_0 = const()[name = string("op_6668_transpose_y_0"), val = bool(false)]; + tensor v_309_cast_fp16 = transpose(perm = var_6662, x = var_6661_cast_fp16)[name = string("transpose_396")]; + tensor var_6668_cast_fp16 = matmul(transpose_x = var_6668_transpose_x_0, transpose_y = var_6668_transpose_y_0, x = var_6666_cast_fp16, y = v_309_cast_fp16)[name = string("op_6668_cast_fp16")]; + tensor var_6669 = const()[name = string("op_6669"), val = tensor([0, 2, 1, 3])]; + tensor concat_683x = const()[name = string("concat_683x"), val = tensor([1, -1, 1280])]; + tensor var_6670_cast_fp16 = transpose(perm = var_6669, x = var_6668_cast_fp16)[name = string("transpose_393")]; + tensor x_553_cast_fp16 = reshape(shape = concat_683x, x = var_6670_cast_fp16)[name = string("x_553_cast_fp16")]; + tensor var_6674_to_fp16 = const()[name = string("op_6674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532096064)))]; + tensor var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535372928)))]; + tensor linear_245_cast_fp16 = linear(bias = var_6675_to_fp16, weight = var_6674_to_fp16, x = x_553_cast_fp16)[name = string("linear_245_cast_fp16")]; + tensor x_555_cast_fp16 = add(x = x_549_cast_fp16, y = linear_245_cast_fp16)[name = string("x_555_cast_fp16")]; + tensor var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor([-1])]; + tensor blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535375552)))]; + tensor blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535378176)))]; + tensor var_6682_cast_fp16 = layer_norm(axes = var_6682_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_555_cast_fp16)[name = string("op_6682_cast_fp16")]; + tensor var_6691_to_fp16 = const()[name = string("op_6691_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535380800)))]; + tensor var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548488064)))]; + tensor linear_246_cast_fp16 = linear(bias = var_6692_to_fp16, weight = var_6691_to_fp16, x = var_6682_cast_fp16)[name = string("linear_246_cast_fp16")]; + string x_559_mode_0 = const()[name = string("x_559_mode_0"), val = string("EXACT")]; + tensor x_559_cast_fp16 = gelu(mode = x_559_mode_0, x = linear_246_cast_fp16)[name = string("x_559_cast_fp16")]; + tensor var_6697_to_fp16 = const()[name = string("op_6697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548498368)))]; + tensor var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561605632)))]; + tensor linear_247_cast_fp16 = linear(bias = var_6698_to_fp16, weight = var_6697_to_fp16, x = x_559_cast_fp16)[name = string("linear_247_cast_fp16")]; + tensor x_561_cast_fp16 = add(x = x_555_cast_fp16, y = linear_247_cast_fp16)[name = string("x_561_cast_fp16")]; + tensor k_cache_125_begin_0 = const()[name = string("k_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor k_cache_125_end_0 = const()[name = string("k_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; + tensor k_cache_125_end_mask_0 = const()[name = string("k_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_125_squeeze_mask_0 = const()[name = string("k_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_125_cast_fp16 = slice_by_index(begin = k_cache_125_begin_0, end = k_cache_125_end_0, end_mask = k_cache_125_end_mask_0, squeeze_mask = k_cache_125_squeeze_mask_0, x = coreml_update_state_124)[name = string("k_cache_125_cast_fp16")]; + tensor v_cache_125_begin_0 = const()[name = string("v_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor v_cache_125_end_0 = const()[name = string("v_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; + tensor v_cache_125_end_mask_0 = const()[name = string("v_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_125_squeeze_mask_0 = const()[name = string("v_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_125_cast_fp16 = slice_by_index(begin = v_cache_125_begin_0, end = v_cache_125_end_0, end_mask = v_cache_125_end_mask_0, squeeze_mask = v_cache_125_squeeze_mask_0, x = coreml_update_state_125)[name = string("v_cache_125_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_6721 = const()[name = string("op_6721"), val = int32(-1)]; + tensor var_6739_axes_0 = const()[name = string("op_6739_axes_0"), val = tensor([-1])]; + tensor blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561608256)))]; + tensor blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561610880)))]; + fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6739_cast_fp16 = layer_norm(axes = var_6739_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_561_cast_fp16)[name = string("op_6739_cast_fp16")]; + tensor var_6750_to_fp16 = const()[name = string("op_6750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561613504)))]; + tensor var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564890368)))]; + tensor linear_248_cast_fp16 = linear(bias = var_6751_to_fp16, weight = var_6750_to_fp16, x = var_6739_cast_fp16)[name = string("linear_248_cast_fp16")]; + tensor var_6754_to_fp16 = const()[name = string("op_6754_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564892992)))]; + tensor linear_249_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6754_to_fp16, x = var_6739_cast_fp16)[name = string("linear_249_cast_fp16")]; + tensor var_6758_to_fp16 = const()[name = string("op_6758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1568169856)))]; + tensor var_6759_to_fp16 = const()[name = string("op_6759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571446720)))]; + tensor linear_250_cast_fp16 = linear(bias = var_6759_to_fp16, weight = var_6758_to_fp16, x = var_6739_cast_fp16)[name = string("linear_250_cast_fp16")]; + tensor var_6761_shape_cast_fp16 = shape(x = linear_248_cast_fp16)[name = string("op_6761_shape_cast_fp16")]; + int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)]; + int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)]; + bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)]; + string var_6761_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6761_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_374_to_uint16 = const()[name = string("select_374_to_uint16"), val = uint16(1)]; + tensor var_6761_shape_cast_fp16_to_uint16 = cast(dtype = var_6761_shape_cast_fp16_to_uint16_dtype_0, x = var_6761_shape_cast_fp16)[name = string("cast_328")]; + uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = select_374_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_6761_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")]; + string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_327")]; + int32 end_step = add(x = offset, y = gather_374_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_496 = const()[name = string("expand_dims_496"), val = tensor([0])]; + tensor expand_dims_498 = const()[name = string("expand_dims_498"), val = tensor([0])]; + tensor expand_dims_499_axes_0 = const()[name = string("expand_dims_499_axes_0"), val = tensor([0])]; + tensor expand_dims_499 = expand_dims(axes = expand_dims_499_axes_0, x = end_step)[name = string("expand_dims_499")]; + tensor concat_686_values0_0 = const()[name = string("concat_686_values0_0"), val = tensor([31])]; + int32 concat_686_axis_0 = const()[name = string("concat_686_axis_0"), val = int32(0)]; + bool concat_686_interleave_0 = const()[name = string("concat_686_interleave_0"), val = bool(false)]; + tensor concat_686 = concat(axis = concat_686_axis_0, interleave = concat_686_interleave_0, values = (concat_686_values0_0, expand_dims_496, expand_dims_1, expand_dims_498))[name = string("concat_686")]; + tensor concat_687_values0_0 = const()[name = string("concat_687_values0_0"), val = tensor([0])]; + tensor concat_687_values1_0 = const()[name = string("concat_687_values1_0"), val = tensor([0])]; + tensor concat_687_values3_0 = const()[name = string("concat_687_values3_0"), val = tensor([0])]; + int32 concat_687_axis_0 = const()[name = string("concat_687_axis_0"), val = int32(0)]; + bool concat_687_interleave_0 = const()[name = string("concat_687_interleave_0"), val = bool(false)]; + tensor concat_687 = concat(axis = concat_687_axis_0, interleave = concat_687_interleave_0, values = (concat_687_values0_0, concat_687_values1_0, expand_dims_499, concat_687_values3_0))[name = string("concat_687")]; + tensor k_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = k_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = k_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_32_stride_0, update = linear_249_cast_fp16, x = coreml_update_state_124)[name = string("k_cache1_internal_tensor_assign_32_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_32_cast_fp16, input = k_cache1)[name = string("coreml_update_state_126_write_state")]; + tensor v_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = v_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = v_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_32_stride_0, update = linear_250_cast_fp16, x = coreml_update_state_125)[name = string("v_cache1_internal_tensor_assign_32_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_32_cast_fp16, input = v_cache1)[name = string("coreml_update_state_127_write_state")]; + int32 concat_692_values0_0 = const()[name = string("concat_692_values0_0"), val = int32(1)]; + int32 concat_692_values2_0 = const()[name = string("concat_692_values2_0"), val = int32(1280)]; + int32 concat_692_axis_0 = const()[name = string("concat_692_axis_0"), val = int32(0)]; + bool concat_692_interleave_0 = const()[name = string("concat_692_interleave_0"), val = bool(false)]; + tensor concat_692 = concat(axis = concat_692_axis_0, interleave = concat_692_interleave_0, values = (concat_692_values0_0, end_step, concat_692_values2_0))[name = string("concat_692")]; + tensor var_6777_begin_0 = const()[name = string("op_6777_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6777_end_mask_0 = const()[name = string("op_6777_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6777_cast_fp16 = slice_by_index(begin = var_6777_begin_0, end = concat_692, end_mask = var_6777_end_mask_0, x = k_cache_125_cast_fp16)[name = string("op_6777_cast_fp16")]; + tensor var_6780_begin_0 = const()[name = string("op_6780_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6780_end_mask_0 = const()[name = string("op_6780_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6780_cast_fp16 = slice_by_index(begin = var_6780_begin_0, end = concat_692, end_mask = var_6780_end_mask_0, x = v_cache_125_cast_fp16)[name = string("op_6780_cast_fp16")]; + tensor concat_694x = const()[name = string("concat_694x"), val = tensor([1, -1, 20, 64])]; + tensor var_6790_cast_fp16 = reshape(shape = concat_694x, x = linear_248_cast_fp16)[name = string("op_6790_cast_fp16")]; + tensor const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_251_cast_fp16 = mul(x = var_6790_cast_fp16, y = const_284_to_fp16)[name = string("q_251_cast_fp16")]; + tensor concat_695x = const()[name = string("concat_695x"), val = tensor([1, -1, 20, 64])]; + tensor var_6797_cast_fp16 = reshape(shape = concat_695x, x = var_6777_cast_fp16)[name = string("op_6797_cast_fp16")]; + tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_315_cast_fp16 = mul(x = var_6797_cast_fp16, y = const_285_to_fp16)[name = string("k_315_cast_fp16")]; + tensor concat_696x = const()[name = string("concat_696x"), val = tensor([1, -1, 20, 64])]; + tensor var_6804_cast_fp16 = reshape(shape = concat_696x, x = var_6780_cast_fp16)[name = string("op_6804_cast_fp16")]; + tensor var_6805 = const()[name = string("op_6805"), val = tensor([0, 2, 1, 3])]; + bool qk_187_transpose_x_0 = const()[name = string("qk_187_transpose_x_0"), val = bool(false)]; + bool qk_187_transpose_y_0 = const()[name = string("qk_187_transpose_y_0"), val = bool(false)]; + tensor transpose_381_perm_0 = const()[name = string("transpose_381_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_382_perm_0 = const()[name = string("transpose_382_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_382 = transpose(perm = transpose_382_perm_0, x = k_315_cast_fp16)[name = string("transpose_390")]; + tensor transpose_381 = transpose(perm = transpose_381_perm_0, x = q_251_cast_fp16)[name = string("transpose_391")]; + tensor qk_187_cast_fp16 = matmul(transpose_x = qk_187_transpose_x_0, transpose_y = qk_187_transpose_y_0, x = transpose_381, y = transpose_382)[name = string("qk_187_cast_fp16")]; + int32 concat_697_values1_0 = const()[name = string("concat_697_values1_0"), val = int32(448)]; + int32 concat_697_axis_0 = const()[name = string("concat_697_axis_0"), val = int32(0)]; + bool concat_697_interleave_0 = const()[name = string("concat_697_interleave_0"), val = bool(false)]; + tensor concat_697 = concat(axis = concat_697_axis_0, interleave = concat_697_interleave_0, values = (gather_374_cast_uint16_to_int32, concat_697_values1_0))[name = string("concat_697")]; + tensor var_6808_begin_0 = const()[name = string("op_6808_begin_0"), val = tensor([0, 0])]; + tensor var_6808_end_mask_0 = const()[name = string("op_6808_end_mask_0"), val = tensor([false, true])]; + tensor var_6808_cast_fp16 = slice_by_index(begin = var_6808_begin_0, end = concat_697, end_mask = var_6808_end_mask_0, x = mask_to_fp16)[name = string("op_6808_cast_fp16")]; + int32 concat_698_values0_0 = const()[name = string("concat_698_values0_0"), val = int32(0)]; + int32 concat_698_axis_0 = const()[name = string("concat_698_axis_0"), val = int32(0)]; + bool concat_698_interleave_0 = const()[name = string("concat_698_interleave_0"), val = bool(false)]; + tensor concat_698 = concat(axis = concat_698_axis_0, interleave = concat_698_interleave_0, values = (concat_698_values0_0, gather_374_cast_uint16_to_int32))[name = string("concat_698")]; + tensor var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor([0, 0])]; + tensor var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor([true, false])]; + tensor var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = concat_698, end_mask = var_6809_end_mask_0, x = var_6808_cast_fp16)[name = string("op_6809_cast_fp16")]; + tensor qk_189_cast_fp16 = add(x = qk_187_cast_fp16, y = var_6809_cast_fp16)[name = string("qk_189_cast_fp16")]; + tensor var_6812_cast_fp16 = softmax(axis = var_6721, x = qk_189_cast_fp16)[name = string("op_6812_cast_fp16")]; + bool var_6814_transpose_x_0 = const()[name = string("op_6814_transpose_x_0"), val = bool(false)]; + bool var_6814_transpose_y_0 = const()[name = string("op_6814_transpose_y_0"), val = bool(false)]; + tensor v_315_cast_fp16 = transpose(perm = var_6805, x = var_6804_cast_fp16)[name = string("transpose_392")]; + tensor var_6814_cast_fp16 = matmul(transpose_x = var_6814_transpose_x_0, transpose_y = var_6814_transpose_y_0, x = var_6812_cast_fp16, y = v_315_cast_fp16)[name = string("op_6814_cast_fp16")]; + tensor var_6815 = const()[name = string("op_6815"), val = tensor([0, 2, 1, 3])]; + tensor concat_699x = const()[name = string("concat_699x"), val = tensor([1, -1, 1280])]; + tensor var_6816_cast_fp16 = transpose(perm = var_6815, x = var_6814_cast_fp16)[name = string("transpose_389")]; + tensor x_565_cast_fp16 = reshape(shape = concat_699x, x = var_6816_cast_fp16)[name = string("x_565_cast_fp16")]; + tensor var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571449344)))]; + tensor var_6821_to_fp16 = const()[name = string("op_6821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574726208)))]; + tensor linear_251_cast_fp16 = linear(bias = var_6821_to_fp16, weight = var_6820_to_fp16, x = x_565_cast_fp16)[name = string("linear_251_cast_fp16")]; + tensor x_567_cast_fp16 = add(x = x_561_cast_fp16, y = linear_251_cast_fp16)[name = string("x_567_cast_fp16")]; + tensor var_6828_axes_0 = const()[name = string("op_6828_axes_0"), val = tensor([-1])]; + tensor blocks_31_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574728832)))]; + tensor blocks_31_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574731456)))]; + tensor var_6828_cast_fp16 = layer_norm(axes = var_6828_axes_0, beta = blocks_31_cross_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_cross_attn_ln_weight_to_fp16, x = x_567_cast_fp16)[name = string("op_6828_cast_fp16")]; + tensor var_6837_to_fp16 = const()[name = string("op_6837_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574734080)))]; + tensor var_6838_to_fp16 = const()[name = string("op_6838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578010944)))]; + tensor linear_252_cast_fp16 = linear(bias = var_6838_to_fp16, weight = var_6837_to_fp16, x = var_6828_cast_fp16)[name = string("linear_252_cast_fp16")]; + tensor concat_700 = const()[name = string("concat_700"), val = tensor([0, 0, 0])]; + tensor concat_701 = const()[name = string("concat_701"), val = tensor([0, 1500, 0])]; + tensor k_317_internal_tensor_assign_1_stride_0 = const()[name = string("k_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_700, begin_mask = k_317_internal_tensor_assign_1_begin_mask_0, end = concat_701, end_mask = k_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_317_internal_tensor_assign_1_squeeze_mask_0, stride = k_317_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_317_internal_tensor_assign_1_cast_fp16")]; + tensor concat_702 = const()[name = string("concat_702"), val = tensor([0, 0, 0])]; + tensor concat_703 = const()[name = string("concat_703"), val = tensor([0, 1500, 0])]; + tensor v_317_internal_tensor_assign_1_stride_0 = const()[name = string("v_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_702, begin_mask = v_317_internal_tensor_assign_1_begin_mask_0, end = concat_703, end_mask = v_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_317_internal_tensor_assign_1_squeeze_mask_0, stride = v_317_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_317_internal_tensor_assign_1_cast_fp16")]; + tensor concat_704x = const()[name = string("concat_704x"), val = tensor([1, -1, 20, 64])]; + tensor var_6858_cast_fp16 = reshape(shape = concat_704x, x = linear_252_cast_fp16)[name = string("op_6858_cast_fp16")]; + tensor const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_6858_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")]; + tensor var_6864 = const()[name = string("op_6864"), val = tensor([1, 1500, 20, -1])]; + tensor var_6865_cast_fp16 = reshape(shape = var_6864, x = k_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6865_cast_fp16")]; + tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_6865_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")]; + tensor var_6871 = const()[name = string("op_6871"), val = tensor([1, 1500, 20, -1])]; + tensor var_6872_cast_fp16 = reshape(shape = var_6871, x = v_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6872_cast_fp16")]; + tensor var_6873 = const()[name = string("op_6873"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_383_perm_0 = const()[name = string("transpose_383_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_384_perm_0 = const()[name = string("transpose_384_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_384 = transpose(perm = transpose_384_perm_0, x = k_cast_fp16)[name = string("transpose_386")]; + tensor transpose_383 = transpose(perm = transpose_383_perm_0, x = q_cast_fp16)[name = string("transpose_387")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_383, y = transpose_384)[name = string("qk_cast_fp16")]; + tensor var_6877_cast_fp16 = softmax(axis = var_6721, x = qk_cast_fp16)[name = string("op_6877_cast_fp16")]; + bool var_6879_transpose_x_0 = const()[name = string("op_6879_transpose_x_0"), val = bool(false)]; + bool var_6879_transpose_y_0 = const()[name = string("op_6879_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_6873, x = var_6872_cast_fp16)[name = string("transpose_388")]; + tensor var_6879_cast_fp16 = matmul(transpose_x = var_6879_transpose_x_0, transpose_y = var_6879_transpose_y_0, x = var_6877_cast_fp16, y = v_cast_fp16)[name = string("op_6879_cast_fp16")]; + tensor var_6880 = const()[name = string("op_6880"), val = tensor([0, 2, 1, 3])]; + tensor concat_705x = const()[name = string("concat_705x"), val = tensor([1, -1, 1280])]; + tensor var_6881_cast_fp16 = transpose(perm = var_6880, x = var_6879_cast_fp16)[name = string("transpose_385")]; + tensor x_571_cast_fp16 = reshape(shape = concat_705x, x = var_6881_cast_fp16)[name = string("x_571_cast_fp16")]; + tensor var_6885_to_fp16 = const()[name = string("op_6885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578013568)))]; + tensor var_6886_to_fp16 = const()[name = string("op_6886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581290432)))]; + tensor linear_253_cast_fp16 = linear(bias = var_6886_to_fp16, weight = var_6885_to_fp16, x = x_571_cast_fp16)[name = string("linear_253_cast_fp16")]; + tensor x_573_cast_fp16 = add(x = x_567_cast_fp16, y = linear_253_cast_fp16)[name = string("x_573_cast_fp16")]; + tensor var_6893_axes_0 = const()[name = string("op_6893_axes_0"), val = tensor([-1])]; + tensor blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581293056)))]; + tensor blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581295680)))]; + tensor var_6893_cast_fp16 = layer_norm(axes = var_6893_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_573_cast_fp16)[name = string("op_6893_cast_fp16")]; + tensor var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581298304)))]; + tensor var_6903_to_fp16 = const()[name = string("op_6903_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594405568)))]; + tensor linear_254_cast_fp16 = linear(bias = var_6903_to_fp16, weight = var_6902_to_fp16, x = var_6893_cast_fp16)[name = string("linear_254_cast_fp16")]; + string x_577_mode_0 = const()[name = string("x_577_mode_0"), val = string("EXACT")]; + tensor x_577_cast_fp16 = gelu(mode = x_577_mode_0, x = linear_254_cast_fp16)[name = string("x_577_cast_fp16")]; + tensor var_6908_to_fp16 = const()[name = string("op_6908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594415872)))]; + tensor var_6909_to_fp16 = const()[name = string("op_6909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607523136)))]; + tensor linear_255_cast_fp16 = linear(bias = var_6909_to_fp16, weight = var_6908_to_fp16, x = x_577_cast_fp16)[name = string("linear_255_cast_fp16")]; + tensor x_579_cast_fp16 = add(x = x_573_cast_fp16, y = linear_255_cast_fp16)[name = string("x_579_cast_fp16")]; + tensor var_6922_axes_0 = const()[name = string("op_6922_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607525760)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607528384)))]; + fp16 var_6913_to_fp16 = const()[name = string("op_6913_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6922_cast_fp16 = layer_norm(axes = var_6922_axes_0, beta = ln_bias_to_fp16, epsilon = var_6913_to_fp16, gamma = ln_weight_to_fp16, x = x_579_cast_fp16)[name = string("op_6922_cast_fp16")]; + tensor var_6932_bias_0_to_fp16 = const()[name = string("op_6932_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607531008)))]; + tensor logits = linear(bias = var_6932_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_6922_cast_fp16)[name = string("op_6932_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/large-v2/decoder_second.mlmodelc/weights/weight.bin b/large-v2/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..e54c5f6f71d4b2bd99cc6c673ad772a28e036400 --- /dev/null +++ b/large-v2/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6ddb1f8892bfddf9f96b11c6b596934c1fe6c01839f81c06d8d2e094f19533 +size 1607634802 diff --git a/large-v2/encoder.mlmodelc/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..be0828391dbe6d138ead9a5b8d3e876c437b9338 --- /dev/null +++ b/large-v2/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf4666de503da73cee5379b4dfa17d4a3f06bff3c0c8e310d0a0e1cf2554f87 +size 202 diff --git a/large-v2/encoder.mlmodelc/coremldata.bin b/large-v2/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..11af75965ddba955d827d601c8bc9c740eccffef --- /dev/null +++ b/large-v2/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e2ae6d3a42a2ca10398635e0b210846dcbc24a31184c93f9302694163bcadaf +size 196 diff --git a/large-v2/encoder.mlmodelc/metadata.json b/large-v2/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..fc7e22714a55ee93a6218c260ec1c188f61696bc --- /dev/null +++ b/large-v2/encoder.mlmodelc/metadata.json @@ -0,0 +1,76 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[1, 1500, 1280]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.add" : 65, + "Ios18.reshape" : 128, + "Ios18.linear" : 192, + "Ios18.gelu" : 34, + "Ios18.matmul" : 64, + "Ios18.transpose" : 129, + "Ios18.layerNorm" : 65, + "Ios18.conv" : 2, + "Ios18.cast" : 4, + "Ios18.softmax" : 32, + "Ios18.mul" : 64 + }, + "computePrecision" : "Mixed (Float16, Float32, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_pipeline", + "structure" : [ + { + "name" : "MLModelType_mlProgram" + }, + { + "name" : "MLModelType_mlProgram" + } + ] + }, + "userDefinedMetadata" : { + + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 80 × 3000)", + "shortDescription" : "", + "shape" : "[1, 80, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "chunked_pipeline", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model0/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a +size 108 diff --git a/large-v2/encoder.mlmodelc/model0/coremldata.bin b/large-v2/encoder.mlmodelc/model0/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..7cff6275c851901c70c7730d7570ee9c01919f76 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model0/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a30736ebb8641d231fc84aa2d3d05770adb9603bdca174d439416450827b75a +size 200 diff --git a/large-v2/encoder.mlmodelc/model0/model.mil b/large-v2/encoder.mlmodelc/model0/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..dd22eae1181ca157d6dd8f1273f2dcb4977c1e88 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model0/model.mil @@ -0,0 +1,962 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor logmel_data) { + string var_84_pad_type_0 = const()[name = string("op_84_pad_type_0"), val = string("custom")]; + tensor var_84_pad_0 = const()[name = string("op_84_pad_0"), val = tensor([1, 1])]; + tensor var_84_strides_0 = const()[name = string("op_84_strides_0"), val = tensor([1])]; + tensor var_84_dilations_0 = const()[name = string("op_84_dilations_0"), val = tensor([1])]; + int32 var_84_groups_0 = const()[name = string("op_84_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614528)))]; + tensor var_84_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_84_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_102_pad_type_0 = const()[name = string("op_102_pad_type_0"), val = string("custom")]; + tensor var_102_pad_0 = const()[name = string("op_102_pad_0"), val = tensor([1, 1])]; + tensor var_102_strides_0 = const()[name = string("op_102_strides_0"), val = tensor([2])]; + tensor var_102_dilations_0 = const()[name = string("op_102_dilations_0"), val = tensor([1])]; + int32 var_102_groups_0 = const()[name = string("op_102_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617152)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10447616)))]; + tensor var_102_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_102_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_108 = const()[name = string("op_108"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10450240)))]; + tensor x_5_cast_fp16 = transpose(perm = var_108, x = x_3_cast_fp16)[name = string("transpose_160")]; + tensor var_111_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_111_cast_fp16")]; + int32 var_124 = const()[name = string("op_124"), val = int32(-1)]; + tensor var_140_axes_0 = const()[name = string("op_140_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14290304)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14292928)))]; + fp16 var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_140_cast_fp16 = layer_norm(axes = var_140_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_111_cast_fp16)[name = string("op_140_cast_fp16")]; + tensor var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14295552)))]; + tensor var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17572416)))]; + tensor linear_0_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = var_140_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17575040)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(20851904)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_155_to_fp16, x = var_140_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(20854528)))]; + tensor var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24131392)))]; + tensor linear_2_cast_fp16 = linear(bias = var_160_to_fp16, weight = var_159_to_fp16, x = var_140_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_168 = const()[name = string("op_168"), val = tensor([1, 1500, 20, -1])]; + tensor var_169_cast_fp16 = reshape(shape = var_168, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")]; + tensor const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_224_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_175 = const()[name = string("op_175"), val = tensor([1, 1500, 20, -1])]; + tensor var_176_cast_fp16 = reshape(shape = var_175, x = linear_1_cast_fp16)[name = string("op_176_cast_fp16")]; + tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_176_cast_fp16, y = const_225_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_182 = const()[name = string("op_182"), val = tensor([1, 1500, 20, -1])]; + tensor var_183_cast_fp16 = reshape(shape = var_182, x = linear_2_cast_fp16)[name = string("op_183_cast_fp16")]; + tensor var_184 = const()[name = string("op_184"), val = tensor([0, 2, -3, -1])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = k_3_cast_fp16)[name = string("transpose_158")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = q_3_cast_fp16)[name = string("transpose_159")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_1_cast_fp16")]; + tensor var_188_cast_fp16 = softmax(axis = var_124, x = qk_1_cast_fp16)[name = string("op_188_cast_fp16")]; + bool var_190_transpose_x_0 = const()[name = string("op_190_transpose_x_0"), val = bool(false)]; + bool var_190_transpose_y_0 = const()[name = string("op_190_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_157")]; + tensor var_190_cast_fp16 = matmul(transpose_x = var_190_transpose_x_0, transpose_y = var_190_transpose_y_0, x = var_188_cast_fp16, y = v_3_cast_fp16)[name = string("op_190_cast_fp16")]; + tensor var_191 = const()[name = string("op_191"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 1280])]; + tensor var_192_cast_fp16 = transpose(perm = var_191, x = var_190_cast_fp16)[name = string("transpose_156")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_192_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24134016)))]; + tensor var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27410880)))]; + tensor linear_3_cast_fp16 = linear(bias = var_197_to_fp16, weight = var_196_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_111_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_204_axes_0 = const()[name = string("op_204_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27413504)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27416128)))]; + tensor var_204_cast_fp16 = layer_norm(axes = var_204_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27418752)))]; + tensor var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40526016)))]; + tensor linear_4_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_204_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40536320)))]; + tensor var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53643584)))]; + tensor linear_5_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-1)]; + tensor var_246_axes_0 = const()[name = string("op_246_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53646208)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53648832)))]; + fp16 var_236_to_fp16 = const()[name = string("op_236_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_246_cast_fp16 = layer_norm(axes = var_246_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_246_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(53651456)))]; + tensor var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(56928320)))]; + tensor linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_246_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(56930944)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_261_to_fp16, x = var_246_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(60207808)))]; + tensor var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63484672)))]; + tensor linear_8_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = var_246_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_274 = const()[name = string("op_274"), val = tensor([1, 1500, 20, -1])]; + tensor var_275_cast_fp16 = reshape(shape = var_274, x = linear_6_cast_fp16)[name = string("op_275_cast_fp16")]; + tensor const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_275_cast_fp16, y = const_226_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_281 = const()[name = string("op_281"), val = tensor([1, 1500, 20, -1])]; + tensor var_282_cast_fp16 = reshape(shape = var_281, x = linear_7_cast_fp16)[name = string("op_282_cast_fp16")]; + tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_282_cast_fp16, y = const_227_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_288 = const()[name = string("op_288"), val = tensor([1, 1500, 20, -1])]; + tensor var_289_cast_fp16 = reshape(shape = var_288, x = linear_8_cast_fp16)[name = string("op_289_cast_fp16")]; + tensor var_290 = const()[name = string("op_290"), val = tensor([0, 2, -3, -1])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = k_7_cast_fp16)[name = string("transpose_154")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = q_7_cast_fp16)[name = string("transpose_155")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_3_cast_fp16")]; + tensor var_294_cast_fp16 = softmax(axis = var_230, x = qk_3_cast_fp16)[name = string("op_294_cast_fp16")]; + bool var_296_transpose_x_0 = const()[name = string("op_296_transpose_x_0"), val = bool(false)]; + bool var_296_transpose_y_0 = const()[name = string("op_296_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_290, x = var_289_cast_fp16)[name = string("transpose_153")]; + tensor var_296_cast_fp16 = matmul(transpose_x = var_296_transpose_x_0, transpose_y = var_296_transpose_y_0, x = var_294_cast_fp16, y = v_7_cast_fp16)[name = string("op_296_cast_fp16")]; + tensor var_297 = const()[name = string("op_297"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 1280])]; + tensor var_298_cast_fp16 = transpose(perm = var_297, x = var_296_cast_fp16)[name = string("transpose_152")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_298_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63487296)))]; + tensor var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66764160)))]; + tensor linear_9_cast_fp16 = linear(bias = var_303_to_fp16, weight = var_302_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_310_axes_0 = const()[name = string("op_310_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66766784)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66769408)))]; + tensor var_310_cast_fp16 = layer_norm(axes = var_310_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_310_cast_fp16")]; + tensor var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(66772032)))]; + tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(79879296)))]; + tensor linear_10_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_310_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(79889600)))]; + tensor var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(92996864)))]; + tensor linear_11_cast_fp16 = linear(bias = var_326_to_fp16, weight = var_325_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_336 = const()[name = string("op_336"), val = int32(-1)]; + tensor var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(92999488)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93002112)))]; + fp16 var_342_to_fp16 = const()[name = string("op_342_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_352_cast_fp16")]; + tensor var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93004736)))]; + tensor var_364_to_fp16 = const()[name = string("op_364_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96281600)))]; + tensor linear_12_cast_fp16 = linear(bias = var_364_to_fp16, weight = var_363_to_fp16, x = var_352_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96284224)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_367_to_fp16, x = var_352_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = string("op_371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(99561088)))]; + tensor var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(102837952)))]; + tensor linear_14_cast_fp16 = linear(bias = var_372_to_fp16, weight = var_371_to_fp16, x = var_352_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_380 = const()[name = string("op_380"), val = tensor([1, 1500, 20, -1])]; + tensor var_381_cast_fp16 = reshape(shape = var_380, x = linear_12_cast_fp16)[name = string("op_381_cast_fp16")]; + tensor const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_381_cast_fp16, y = const_228_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_387 = const()[name = string("op_387"), val = tensor([1, 1500, 20, -1])]; + tensor var_388_cast_fp16 = reshape(shape = var_387, x = linear_13_cast_fp16)[name = string("op_388_cast_fp16")]; + tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_388_cast_fp16, y = const_229_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_394 = const()[name = string("op_394"), val = tensor([1, 1500, 20, -1])]; + tensor var_395_cast_fp16 = reshape(shape = var_394, x = linear_14_cast_fp16)[name = string("op_395_cast_fp16")]; + tensor var_396 = const()[name = string("op_396"), val = tensor([0, 2, -3, -1])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = k_11_cast_fp16)[name = string("transpose_150")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = q_11_cast_fp16)[name = string("transpose_151")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_5_cast_fp16")]; + tensor var_400_cast_fp16 = softmax(axis = var_336, x = qk_5_cast_fp16)[name = string("op_400_cast_fp16")]; + bool var_402_transpose_x_0 = const()[name = string("op_402_transpose_x_0"), val = bool(false)]; + bool var_402_transpose_y_0 = const()[name = string("op_402_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_396, x = var_395_cast_fp16)[name = string("transpose_149")]; + tensor var_402_cast_fp16 = matmul(transpose_x = var_402_transpose_x_0, transpose_y = var_402_transpose_y_0, x = var_400_cast_fp16, y = v_11_cast_fp16)[name = string("op_402_cast_fp16")]; + tensor var_403 = const()[name = string("op_403"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 1280])]; + tensor var_404_cast_fp16 = transpose(perm = var_403, x = var_402_cast_fp16)[name = string("transpose_148")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_404_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(102840576)))]; + tensor var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106117440)))]; + tensor linear_15_cast_fp16 = linear(bias = var_409_to_fp16, weight = var_408_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_416_axes_0 = const()[name = string("op_416_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106120064)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106122688)))]; + tensor var_416_cast_fp16 = layer_norm(axes = var_416_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_416_cast_fp16")]; + tensor var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106125312)))]; + tensor var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119232576)))]; + tensor linear_16_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_416_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119242880)))]; + tensor var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132350144)))]; + tensor linear_17_cast_fp16 = linear(bias = var_432_to_fp16, weight = var_431_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_442 = const()[name = string("op_442"), val = int32(-1)]; + tensor var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132352768)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132355392)))]; + fp16 var_448_to_fp16 = const()[name = string("op_448_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_458_cast_fp16 = layer_norm(axes = var_458_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_458_cast_fp16")]; + tensor var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132358016)))]; + tensor var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(135634880)))]; + tensor linear_18_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = var_458_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = string("op_473_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(135637504)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_473_to_fp16, x = var_458_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(138914368)))]; + tensor var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142191232)))]; + tensor linear_20_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = var_458_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_486 = const()[name = string("op_486"), val = tensor([1, 1500, 20, -1])]; + tensor var_487_cast_fp16 = reshape(shape = var_486, x = linear_18_cast_fp16)[name = string("op_487_cast_fp16")]; + tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_487_cast_fp16, y = const_230_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_493 = const()[name = string("op_493"), val = tensor([1, 1500, 20, -1])]; + tensor var_494_cast_fp16 = reshape(shape = var_493, x = linear_19_cast_fp16)[name = string("op_494_cast_fp16")]; + tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_494_cast_fp16, y = const_231_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_500 = const()[name = string("op_500"), val = tensor([1, 1500, 20, -1])]; + tensor var_501_cast_fp16 = reshape(shape = var_500, x = linear_20_cast_fp16)[name = string("op_501_cast_fp16")]; + tensor var_502 = const()[name = string("op_502"), val = tensor([0, 2, -3, -1])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = k_15_cast_fp16)[name = string("transpose_146")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = q_15_cast_fp16)[name = string("transpose_147")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_7_cast_fp16")]; + tensor var_506_cast_fp16 = softmax(axis = var_442, x = qk_7_cast_fp16)[name = string("op_506_cast_fp16")]; + bool var_508_transpose_x_0 = const()[name = string("op_508_transpose_x_0"), val = bool(false)]; + bool var_508_transpose_y_0 = const()[name = string("op_508_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = string("transpose_145")]; + tensor var_508_cast_fp16 = matmul(transpose_x = var_508_transpose_x_0, transpose_y = var_508_transpose_y_0, x = var_506_cast_fp16, y = v_15_cast_fp16)[name = string("op_508_cast_fp16")]; + tensor var_509 = const()[name = string("op_509"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 1280])]; + tensor var_510_cast_fp16 = transpose(perm = var_509, x = var_508_cast_fp16)[name = string("transpose_144")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_510_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142193856)))]; + tensor var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145470720)))]; + tensor linear_21_cast_fp16 = linear(bias = var_515_to_fp16, weight = var_514_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_522_axes_0 = const()[name = string("op_522_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145473344)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145475968)))]; + tensor var_522_cast_fp16 = layer_norm(axes = var_522_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_522_cast_fp16")]; + tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145478592)))]; + tensor var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158585856)))]; + tensor linear_22_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_522_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158596160)))]; + tensor var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171703424)))]; + tensor linear_23_cast_fp16 = linear(bias = var_538_to_fp16, weight = var_537_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")]; + int32 var_548 = const()[name = string("op_548"), val = int32(-1)]; + tensor var_564_axes_0 = const()[name = string("op_564_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171706048)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171708672)))]; + fp16 var_554_to_fp16 = const()[name = string("op_554_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_564_cast_fp16 = layer_norm(axes = var_564_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_564_cast_fp16")]; + tensor var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(171711296)))]; + tensor var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(174988160)))]; + tensor linear_24_cast_fp16 = linear(bias = var_576_to_fp16, weight = var_575_to_fp16, x = var_564_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(174990784)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_579_to_fp16, x = var_564_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_583_to_fp16 = const()[name = string("op_583_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(178267648)))]; + tensor var_584_to_fp16 = const()[name = string("op_584_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181544512)))]; + tensor linear_26_cast_fp16 = linear(bias = var_584_to_fp16, weight = var_583_to_fp16, x = var_564_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_592 = const()[name = string("op_592"), val = tensor([1, 1500, 20, -1])]; + tensor var_593_cast_fp16 = reshape(shape = var_592, x = linear_24_cast_fp16)[name = string("op_593_cast_fp16")]; + tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_593_cast_fp16, y = const_232_to_fp16)[name = string("q_19_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([1, 1500, 20, -1])]; + tensor var_600_cast_fp16 = reshape(shape = var_599, x = linear_25_cast_fp16)[name = string("op_600_cast_fp16")]; + tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_600_cast_fp16, y = const_233_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_606 = const()[name = string("op_606"), val = tensor([1, 1500, 20, -1])]; + tensor var_607_cast_fp16 = reshape(shape = var_606, x = linear_26_cast_fp16)[name = string("op_607_cast_fp16")]; + tensor var_608 = const()[name = string("op_608"), val = tensor([0, 2, -3, -1])]; + bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)]; + bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = k_19_cast_fp16)[name = string("transpose_142")]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = q_19_cast_fp16)[name = string("transpose_143")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_9_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_548, x = qk_9_cast_fp16)[name = string("op_612_cast_fp16")]; + bool var_614_transpose_x_0 = const()[name = string("op_614_transpose_x_0"), val = bool(false)]; + bool var_614_transpose_y_0 = const()[name = string("op_614_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_608, x = var_607_cast_fp16)[name = string("transpose_141")]; + tensor var_614_cast_fp16 = matmul(transpose_x = var_614_transpose_x_0, transpose_y = var_614_transpose_y_0, x = var_612_cast_fp16, y = v_19_cast_fp16)[name = string("op_614_cast_fp16")]; + tensor var_615 = const()[name = string("op_615"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 1500, 1280])]; + tensor var_616_cast_fp16 = transpose(perm = var_615, x = var_614_cast_fp16)[name = string("transpose_140")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = var_616_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181547136)))]; + tensor var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184824000)))]; + tensor linear_27_cast_fp16 = linear(bias = var_621_to_fp16, weight = var_620_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_628_axes_0 = const()[name = string("op_628_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184826624)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184829248)))]; + tensor var_628_cast_fp16 = layer_norm(axes = var_628_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_628_cast_fp16")]; + tensor var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(184831872)))]; + tensor var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(197939136)))]; + tensor linear_28_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_628_cast_fp16)[name = string("linear_28_cast_fp16")]; + string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = string("op_643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(197949440)))]; + tensor var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211056704)))]; + tensor linear_29_cast_fp16 = linear(bias = var_644_to_fp16, weight = var_643_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")]; + int32 var_654 = const()[name = string("op_654"), val = int32(-1)]; + tensor var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211059328)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211061952)))]; + fp16 var_660_to_fp16 = const()[name = string("op_660_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_670_cast_fp16")]; + tensor var_681_to_fp16 = const()[name = string("op_681_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211064576)))]; + tensor var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214341440)))]; + tensor linear_30_cast_fp16 = linear(bias = var_682_to_fp16, weight = var_681_to_fp16, x = var_670_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214344064)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_685_to_fp16, x = var_670_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(217620928)))]; + tensor var_690_to_fp16 = const()[name = string("op_690_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(220897792)))]; + tensor linear_32_cast_fp16 = linear(bias = var_690_to_fp16, weight = var_689_to_fp16, x = var_670_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([1, 1500, 20, -1])]; + tensor var_699_cast_fp16 = reshape(shape = var_698, x = linear_30_cast_fp16)[name = string("op_699_cast_fp16")]; + tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_699_cast_fp16, y = const_234_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_705 = const()[name = string("op_705"), val = tensor([1, 1500, 20, -1])]; + tensor var_706_cast_fp16 = reshape(shape = var_705, x = linear_31_cast_fp16)[name = string("op_706_cast_fp16")]; + tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_23_cast_fp16 = mul(x = var_706_cast_fp16, y = const_235_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_712 = const()[name = string("op_712"), val = tensor([1, 1500, 20, -1])]; + tensor var_713_cast_fp16 = reshape(shape = var_712, x = linear_32_cast_fp16)[name = string("op_713_cast_fp16")]; + tensor var_714 = const()[name = string("op_714"), val = tensor([0, 2, -3, -1])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = k_23_cast_fp16)[name = string("transpose_138")]; + tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = q_23_cast_fp16)[name = string("transpose_139")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_11_cast_fp16")]; + tensor var_718_cast_fp16 = softmax(axis = var_654, x = qk_11_cast_fp16)[name = string("op_718_cast_fp16")]; + bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)]; + bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)]; + tensor v_23_cast_fp16 = transpose(perm = var_714, x = var_713_cast_fp16)[name = string("transpose_137")]; + tensor var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = var_718_cast_fp16, y = v_23_cast_fp16)[name = string("op_720_cast_fp16")]; + tensor var_721 = const()[name = string("op_721"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([1, 1500, 1280])]; + tensor var_722_cast_fp16 = transpose(perm = var_721, x = var_720_cast_fp16)[name = string("transpose_136")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = var_722_cast_fp16)[name = string("x_71_cast_fp16")]; + tensor var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(220900416)))]; + tensor var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224177280)))]; + tensor linear_33_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_734_axes_0 = const()[name = string("op_734_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224179904)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224182528)))]; + tensor var_734_cast_fp16 = layer_norm(axes = var_734_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_734_cast_fp16")]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224185152)))]; + tensor var_744_to_fp16 = const()[name = string("op_744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237292416)))]; + tensor linear_34_cast_fp16 = linear(bias = var_744_to_fp16, weight = var_743_to_fp16, x = var_734_cast_fp16)[name = string("linear_34_cast_fp16")]; + string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")]; + tensor var_749_to_fp16 = const()[name = string("op_749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237302720)))]; + tensor var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250409984)))]; + tensor linear_35_cast_fp16 = linear(bias = var_750_to_fp16, weight = var_749_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_760 = const()[name = string("op_760"), val = int32(-1)]; + tensor var_776_axes_0 = const()[name = string("op_776_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250412608)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250415232)))]; + fp16 var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_776_cast_fp16 = layer_norm(axes = var_776_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_776_cast_fp16")]; + tensor var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250417856)))]; + tensor var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(253694720)))]; + tensor linear_36_cast_fp16 = linear(bias = var_788_to_fp16, weight = var_787_to_fp16, x = var_776_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor var_791_to_fp16 = const()[name = string("op_791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(253697344)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_791_to_fp16, x = var_776_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(256974208)))]; + tensor var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260251072)))]; + tensor linear_38_cast_fp16 = linear(bias = var_796_to_fp16, weight = var_795_to_fp16, x = var_776_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor var_804 = const()[name = string("op_804"), val = tensor([1, 1500, 20, -1])]; + tensor var_805_cast_fp16 = reshape(shape = var_804, x = linear_36_cast_fp16)[name = string("op_805_cast_fp16")]; + tensor const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_805_cast_fp16, y = const_236_to_fp16)[name = string("q_27_cast_fp16")]; + tensor var_811 = const()[name = string("op_811"), val = tensor([1, 1500, 20, -1])]; + tensor var_812_cast_fp16 = reshape(shape = var_811, x = linear_37_cast_fp16)[name = string("op_812_cast_fp16")]; + tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_27_cast_fp16 = mul(x = var_812_cast_fp16, y = const_237_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_818 = const()[name = string("op_818"), val = tensor([1, 1500, 20, -1])]; + tensor var_819_cast_fp16 = reshape(shape = var_818, x = linear_38_cast_fp16)[name = string("op_819_cast_fp16")]; + tensor var_820 = const()[name = string("op_820"), val = tensor([0, 2, -3, -1])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = k_27_cast_fp16)[name = string("transpose_134")]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = q_27_cast_fp16)[name = string("transpose_135")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_13_cast_fp16")]; + tensor var_824_cast_fp16 = softmax(axis = var_760, x = qk_13_cast_fp16)[name = string("op_824_cast_fp16")]; + bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)]; + bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)]; + tensor v_27_cast_fp16 = transpose(perm = var_820, x = var_819_cast_fp16)[name = string("transpose_133")]; + tensor var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_27_cast_fp16)[name = string("op_826_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([0, 2, 1, 3])]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([1, 1500, 1280])]; + tensor var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_132")]; + tensor x_83_cast_fp16 = reshape(shape = concat_6, x = var_828_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260253696)))]; + tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263530560)))]; + tensor linear_39_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263533184)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263535808)))]; + tensor var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263538432)))]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(276645696)))]; + tensor linear_40_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_40_cast_fp16")]; + string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")]; + tensor x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_855_to_fp16 = const()[name = string("op_855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(276656000)))]; + tensor var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289763264)))]; + tensor linear_41_cast_fp16 = linear(bias = var_856_to_fp16, weight = var_855_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")]; + int32 var_866 = const()[name = string("op_866"), val = int32(-1)]; + tensor var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289765888)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289768512)))]; + fp16 var_872_to_fp16 = const()[name = string("op_872_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_882_cast_fp16 = layer_norm(axes = var_882_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_882_cast_fp16")]; + tensor var_893_to_fp16 = const()[name = string("op_893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(289771136)))]; + tensor var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293048000)))]; + tensor linear_42_cast_fp16 = linear(bias = var_894_to_fp16, weight = var_893_to_fp16, x = var_882_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293050624)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_897_to_fp16, x = var_882_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(296327488)))]; + tensor var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299604352)))]; + tensor linear_44_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = var_882_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor var_910 = const()[name = string("op_910"), val = tensor([1, 1500, 20, -1])]; + tensor var_911_cast_fp16 = reshape(shape = var_910, x = linear_42_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_911_cast_fp16, y = const_238_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_917 = const()[name = string("op_917"), val = tensor([1, 1500, 20, -1])]; + tensor var_918_cast_fp16 = reshape(shape = var_917, x = linear_43_cast_fp16)[name = string("op_918_cast_fp16")]; + tensor const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_239_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([1, 1500, 20, -1])]; + tensor var_925_cast_fp16 = reshape(shape = var_924, x = linear_44_cast_fp16)[name = string("op_925_cast_fp16")]; + tensor var_926 = const()[name = string("op_926"), val = tensor([0, 2, -3, -1])]; + bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)]; + bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = k_31_cast_fp16)[name = string("transpose_130")]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = q_31_cast_fp16)[name = string("transpose_131")]; + tensor qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_15_cast_fp16")]; + tensor var_930_cast_fp16 = softmax(axis = var_866, x = qk_15_cast_fp16)[name = string("op_930_cast_fp16")]; + bool var_932_transpose_x_0 = const()[name = string("op_932_transpose_x_0"), val = bool(false)]; + bool var_932_transpose_y_0 = const()[name = string("op_932_transpose_y_0"), val = bool(false)]; + tensor v_31_cast_fp16 = transpose(perm = var_926, x = var_925_cast_fp16)[name = string("transpose_129")]; + tensor var_932_cast_fp16 = matmul(transpose_x = var_932_transpose_x_0, transpose_y = var_932_transpose_y_0, x = var_930_cast_fp16, y = v_31_cast_fp16)[name = string("op_932_cast_fp16")]; + tensor var_933 = const()[name = string("op_933"), val = tensor([0, 2, 1, 3])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([1, 1500, 1280])]; + tensor var_934_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_128")]; + tensor x_95_cast_fp16 = reshape(shape = concat_7, x = var_934_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299606976)))]; + tensor var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302883840)))]; + tensor linear_45_cast_fp16 = linear(bias = var_939_to_fp16, weight = var_938_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_946_axes_0 = const()[name = string("op_946_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302886464)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302889088)))]; + tensor var_946_cast_fp16 = layer_norm(axes = var_946_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_946_cast_fp16")]; + tensor var_955_to_fp16 = const()[name = string("op_955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(302891712)))]; + tensor var_956_to_fp16 = const()[name = string("op_956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(315998976)))]; + tensor linear_46_cast_fp16 = linear(bias = var_956_to_fp16, weight = var_955_to_fp16, x = var_946_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")]; + tensor x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_961_to_fp16 = const()[name = string("op_961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316009280)))]; + tensor var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329116544)))]; + tensor linear_47_cast_fp16 = linear(bias = var_962_to_fp16, weight = var_961_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")]; + int32 var_972 = const()[name = string("op_972"), val = int32(-1)]; + tensor var_988_axes_0 = const()[name = string("op_988_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329119168)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329121792)))]; + fp16 var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_988_cast_fp16 = layer_norm(axes = var_988_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_988_cast_fp16")]; + tensor var_999_to_fp16 = const()[name = string("op_999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329124416)))]; + tensor var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332401280)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1000_to_fp16, weight = var_999_to_fp16, x = var_988_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332403904)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1003_to_fp16, x = var_988_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(335680768)))]; + tensor var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(338957632)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1008_to_fp16, weight = var_1007_to_fp16, x = var_988_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([1, 1500, 20, -1])]; + tensor var_1017_cast_fp16 = reshape(shape = var_1016, x = linear_48_cast_fp16)[name = string("op_1017_cast_fp16")]; + tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1017_cast_fp16, y = const_240_to_fp16)[name = string("q_35_cast_fp16")]; + tensor var_1023 = const()[name = string("op_1023"), val = tensor([1, 1500, 20, -1])]; + tensor var_1024_cast_fp16 = reshape(shape = var_1023, x = linear_49_cast_fp16)[name = string("op_1024_cast_fp16")]; + tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_1024_cast_fp16, y = const_241_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_1030 = const()[name = string("op_1030"), val = tensor([1, 1500, 20, -1])]; + tensor var_1031_cast_fp16 = reshape(shape = var_1030, x = linear_50_cast_fp16)[name = string("op_1031_cast_fp16")]; + tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, -3, -1])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = k_35_cast_fp16)[name = string("transpose_126")]; + tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = q_35_cast_fp16)[name = string("transpose_127")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_17_cast_fp16")]; + tensor var_1036_cast_fp16 = softmax(axis = var_972, x = qk_17_cast_fp16)[name = string("op_1036_cast_fp16")]; + bool var_1038_transpose_x_0 = const()[name = string("op_1038_transpose_x_0"), val = bool(false)]; + bool var_1038_transpose_y_0 = const()[name = string("op_1038_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_1032, x = var_1031_cast_fp16)[name = string("transpose_125")]; + tensor var_1038_cast_fp16 = matmul(transpose_x = var_1038_transpose_x_0, transpose_y = var_1038_transpose_y_0, x = var_1036_cast_fp16, y = v_35_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039 = const()[name = string("op_1039"), val = tensor([0, 2, 1, 3])]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 1500, 1280])]; + tensor var_1040_cast_fp16 = transpose(perm = var_1039, x = var_1038_cast_fp16)[name = string("transpose_124")]; + tensor x_107_cast_fp16 = reshape(shape = concat_8, x = var_1040_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(338960256)))]; + tensor var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342237120)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1045_to_fp16, weight = var_1044_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1052_axes_0 = const()[name = string("op_1052_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342239744)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342242368)))]; + tensor var_1052_cast_fp16 = layer_norm(axes = var_1052_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1052_cast_fp16")]; + tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342244992)))]; + tensor var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355352256)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1052_cast_fp16)[name = string("linear_52_cast_fp16")]; + string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")]; + tensor x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")]; + tensor var_1067_to_fp16 = const()[name = string("op_1067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355362560)))]; + tensor var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368469824)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1068_to_fp16, weight = var_1067_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")]; + int32 var_1078 = const()[name = string("op_1078"), val = int32(-1)]; + tensor var_1094_axes_0 = const()[name = string("op_1094_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368472448)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368475072)))]; + fp16 var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1094_cast_fp16 = layer_norm(axes = var_1094_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor var_1105_to_fp16 = const()[name = string("op_1105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368477696)))]; + tensor var_1106_to_fp16 = const()[name = string("op_1106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(371754560)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1106_to_fp16, weight = var_1105_to_fp16, x = var_1094_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(371757184)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1109_to_fp16, x = var_1094_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor var_1113_to_fp16 = const()[name = string("op_1113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(375034048)))]; + tensor var_1114_to_fp16 = const()[name = string("op_1114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378310912)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1114_to_fp16, weight = var_1113_to_fp16, x = var_1094_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1122 = const()[name = string("op_1122"), val = tensor([1, 1500, 20, -1])]; + tensor var_1123_cast_fp16 = reshape(shape = var_1122, x = linear_54_cast_fp16)[name = string("op_1123_cast_fp16")]; + tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1123_cast_fp16, y = const_242_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([1, 1500, 20, -1])]; + tensor var_1130_cast_fp16 = reshape(shape = var_1129, x = linear_55_cast_fp16)[name = string("op_1130_cast_fp16")]; + tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_1130_cast_fp16, y = const_243_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_1136 = const()[name = string("op_1136"), val = tensor([1, 1500, 20, -1])]; + tensor var_1137_cast_fp16 = reshape(shape = var_1136, x = linear_56_cast_fp16)[name = string("op_1137_cast_fp16")]; + tensor var_1138 = const()[name = string("op_1138"), val = tensor([0, 2, -3, -1])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = k_39_cast_fp16)[name = string("transpose_122")]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = q_39_cast_fp16)[name = string("transpose_123")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_19_cast_fp16")]; + tensor var_1142_cast_fp16 = softmax(axis = var_1078, x = qk_19_cast_fp16)[name = string("op_1142_cast_fp16")]; + bool var_1144_transpose_x_0 = const()[name = string("op_1144_transpose_x_0"), val = bool(false)]; + bool var_1144_transpose_y_0 = const()[name = string("op_1144_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_1138, x = var_1137_cast_fp16)[name = string("transpose_121")]; + tensor var_1144_cast_fp16 = matmul(transpose_x = var_1144_transpose_x_0, transpose_y = var_1144_transpose_y_0, x = var_1142_cast_fp16, y = v_39_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor var_1145 = const()[name = string("op_1145"), val = tensor([0, 2, 1, 3])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([1, 1500, 1280])]; + tensor var_1146_cast_fp16 = transpose(perm = var_1145, x = var_1144_cast_fp16)[name = string("transpose_120")]; + tensor x_119_cast_fp16 = reshape(shape = concat_9, x = var_1146_cast_fp16)[name = string("x_119_cast_fp16")]; + tensor var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378313536)))]; + tensor var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381590400)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1151_to_fp16, weight = var_1150_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1158_axes_0 = const()[name = string("op_1158_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381593024)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381595648)))]; + tensor var_1158_cast_fp16 = layer_norm(axes = var_1158_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1158_cast_fp16")]; + tensor var_1167_to_fp16 = const()[name = string("op_1167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381598272)))]; + tensor var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(394705536)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1168_to_fp16, weight = var_1167_to_fp16, x = var_1158_cast_fp16)[name = string("linear_58_cast_fp16")]; + string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")]; + tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(394715840)))]; + tensor var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407823104)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")]; + int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)]; + tensor var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407825728)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407828352)))]; + fp16 var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1200_cast_fp16 = layer_norm(axes = var_1200_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1200_cast_fp16")]; + tensor var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(407830976)))]; + tensor var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411107840)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = var_1200_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1215_to_fp16 = const()[name = string("op_1215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411110464)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1215_to_fp16, x = var_1200_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor var_1219_to_fp16 = const()[name = string("op_1219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(414387328)))]; + tensor var_1220_to_fp16 = const()[name = string("op_1220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(417664192)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1220_to_fp16, weight = var_1219_to_fp16, x = var_1200_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor var_1228 = const()[name = string("op_1228"), val = tensor([1, 1500, 20, -1])]; + tensor var_1229_cast_fp16 = reshape(shape = var_1228, x = linear_60_cast_fp16)[name = string("op_1229_cast_fp16")]; + tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1229_cast_fp16, y = const_244_to_fp16)[name = string("q_43_cast_fp16")]; + tensor var_1235 = const()[name = string("op_1235"), val = tensor([1, 1500, 20, -1])]; + tensor var_1236_cast_fp16 = reshape(shape = var_1235, x = linear_61_cast_fp16)[name = string("op_1236_cast_fp16")]; + tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_43_cast_fp16 = mul(x = var_1236_cast_fp16, y = const_245_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_1242 = const()[name = string("op_1242"), val = tensor([1, 1500, 20, -1])]; + tensor var_1243_cast_fp16 = reshape(shape = var_1242, x = linear_62_cast_fp16)[name = string("op_1243_cast_fp16")]; + tensor var_1244 = const()[name = string("op_1244"), val = tensor([0, 2, -3, -1])]; + bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)]; + bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = k_43_cast_fp16)[name = string("transpose_118")]; + tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = q_43_cast_fp16)[name = string("transpose_119")]; + tensor qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_21_cast_fp16")]; + tensor var_1248_cast_fp16 = softmax(axis = var_1184, x = qk_21_cast_fp16)[name = string("op_1248_cast_fp16")]; + bool var_1250_transpose_x_0 = const()[name = string("op_1250_transpose_x_0"), val = bool(false)]; + bool var_1250_transpose_y_0 = const()[name = string("op_1250_transpose_y_0"), val = bool(false)]; + tensor v_43_cast_fp16 = transpose(perm = var_1244, x = var_1243_cast_fp16)[name = string("transpose_117")]; + tensor var_1250_cast_fp16 = matmul(transpose_x = var_1250_transpose_x_0, transpose_y = var_1250_transpose_y_0, x = var_1248_cast_fp16, y = v_43_cast_fp16)[name = string("op_1250_cast_fp16")]; + tensor var_1251 = const()[name = string("op_1251"), val = tensor([0, 2, 1, 3])]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([1, 1500, 1280])]; + tensor var_1252_cast_fp16 = transpose(perm = var_1251, x = var_1250_cast_fp16)[name = string("transpose_116")]; + tensor x_131_cast_fp16 = reshape(shape = concat_10, x = var_1252_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(417666816)))]; + tensor var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420943680)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1257_to_fp16, weight = var_1256_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1264_axes_0 = const()[name = string("op_1264_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420946304)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420948928)))]; + tensor var_1264_cast_fp16 = layer_norm(axes = var_1264_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1264_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(420951552)))]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434058816)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1274_to_fp16, weight = var_1273_to_fp16, x = var_1264_cast_fp16)[name = string("linear_64_cast_fp16")]; + string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")]; + tensor x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_1279_to_fp16 = const()[name = string("op_1279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434069120)))]; + tensor var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447176384)))]; + tensor linear_65_cast_fp16 = linear(bias = var_1280_to_fp16, weight = var_1279_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")]; + int32 var_1290 = const()[name = string("op_1290"), val = int32(-1)]; + tensor var_1306_axes_0 = const()[name = string("op_1306_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447179008)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447181632)))]; + fp16 var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1306_cast_fp16 = layer_norm(axes = var_1306_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1306_cast_fp16")]; + tensor var_1317_to_fp16 = const()[name = string("op_1317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447184256)))]; + tensor var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450461120)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1318_to_fp16, weight = var_1317_to_fp16, x = var_1306_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1321_to_fp16 = const()[name = string("op_1321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450463744)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1321_to_fp16, x = var_1306_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1325_to_fp16 = const()[name = string("op_1325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(453740608)))]; + tensor var_1326_to_fp16 = const()[name = string("op_1326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457017472)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1326_to_fp16, weight = var_1325_to_fp16, x = var_1306_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor var_1334 = const()[name = string("op_1334"), val = tensor([1, 1500, 20, -1])]; + tensor var_1335_cast_fp16 = reshape(shape = var_1334, x = linear_66_cast_fp16)[name = string("op_1335_cast_fp16")]; + tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1335_cast_fp16, y = const_246_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1341 = const()[name = string("op_1341"), val = tensor([1, 1500, 20, -1])]; + tensor var_1342_cast_fp16 = reshape(shape = var_1341, x = linear_67_cast_fp16)[name = string("op_1342_cast_fp16")]; + tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_47_cast_fp16 = mul(x = var_1342_cast_fp16, y = const_247_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_1348 = const()[name = string("op_1348"), val = tensor([1, 1500, 20, -1])]; + tensor var_1349_cast_fp16 = reshape(shape = var_1348, x = linear_68_cast_fp16)[name = string("op_1349_cast_fp16")]; + tensor var_1350 = const()[name = string("op_1350"), val = tensor([0, 2, -3, -1])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = k_47_cast_fp16)[name = string("transpose_114")]; + tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = q_47_cast_fp16)[name = string("transpose_115")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_23_cast_fp16")]; + tensor var_1354_cast_fp16 = softmax(axis = var_1290, x = qk_23_cast_fp16)[name = string("op_1354_cast_fp16")]; + bool var_1356_transpose_x_0 = const()[name = string("op_1356_transpose_x_0"), val = bool(false)]; + bool var_1356_transpose_y_0 = const()[name = string("op_1356_transpose_y_0"), val = bool(false)]; + tensor v_47_cast_fp16 = transpose(perm = var_1350, x = var_1349_cast_fp16)[name = string("transpose_113")]; + tensor var_1356_cast_fp16 = matmul(transpose_x = var_1356_transpose_x_0, transpose_y = var_1356_transpose_y_0, x = var_1354_cast_fp16, y = v_47_cast_fp16)[name = string("op_1356_cast_fp16")]; + tensor var_1357 = const()[name = string("op_1357"), val = tensor([0, 2, 1, 3])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 1500, 1280])]; + tensor var_1358_cast_fp16 = transpose(perm = var_1357, x = var_1356_cast_fp16)[name = string("transpose_112")]; + tensor x_143_cast_fp16 = reshape(shape = concat_11, x = var_1358_cast_fp16)[name = string("x_143_cast_fp16")]; + tensor var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457020096)))]; + tensor var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460296960)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1363_to_fp16, weight = var_1362_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1370_axes_0 = const()[name = string("op_1370_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460299584)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460302208)))]; + tensor var_1370_cast_fp16 = layer_norm(axes = var_1370_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1370_cast_fp16")]; + tensor var_1379_to_fp16 = const()[name = string("op_1379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460304832)))]; + tensor var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473412096)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1380_to_fp16, weight = var_1379_to_fp16, x = var_1370_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")]; + tensor x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473422400)))]; + tensor var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486529664)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1386_to_fp16, weight = var_1385_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")]; + int32 var_1396 = const()[name = string("op_1396"), val = int32(-1)]; + tensor var_1412_axes_0 = const()[name = string("op_1412_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486532288)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486534912)))]; + fp16 var_1402_to_fp16 = const()[name = string("op_1402_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1412_cast_fp16 = layer_norm(axes = var_1412_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1412_cast_fp16")]; + tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486537536)))]; + tensor var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(489814400)))]; + tensor linear_72_cast_fp16 = linear(bias = var_1424_to_fp16, weight = var_1423_to_fp16, x = var_1412_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_1427_to_fp16 = const()[name = string("op_1427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(489817024)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1427_to_fp16, x = var_1412_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_1431_to_fp16 = const()[name = string("op_1431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(493093888)))]; + tensor var_1432_to_fp16 = const()[name = string("op_1432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496370752)))]; + tensor linear_74_cast_fp16 = linear(bias = var_1432_to_fp16, weight = var_1431_to_fp16, x = var_1412_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_1440 = const()[name = string("op_1440"), val = tensor([1, 1500, 20, -1])]; + tensor var_1441_cast_fp16 = reshape(shape = var_1440, x = linear_72_cast_fp16)[name = string("op_1441_cast_fp16")]; + tensor const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1441_cast_fp16, y = const_248_to_fp16)[name = string("q_51_cast_fp16")]; + tensor var_1447 = const()[name = string("op_1447"), val = tensor([1, 1500, 20, -1])]; + tensor var_1448_cast_fp16 = reshape(shape = var_1447, x = linear_73_cast_fp16)[name = string("op_1448_cast_fp16")]; + tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_51_cast_fp16 = mul(x = var_1448_cast_fp16, y = const_249_to_fp16)[name = string("k_51_cast_fp16")]; + tensor var_1454 = const()[name = string("op_1454"), val = tensor([1, 1500, 20, -1])]; + tensor var_1455_cast_fp16 = reshape(shape = var_1454, x = linear_74_cast_fp16)[name = string("op_1455_cast_fp16")]; + tensor var_1456 = const()[name = string("op_1456"), val = tensor([0, 2, -3, -1])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = k_51_cast_fp16)[name = string("transpose_110")]; + tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = q_51_cast_fp16)[name = string("transpose_111")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_25_cast_fp16")]; + tensor var_1460_cast_fp16 = softmax(axis = var_1396, x = qk_25_cast_fp16)[name = string("op_1460_cast_fp16")]; + bool var_1462_transpose_x_0 = const()[name = string("op_1462_transpose_x_0"), val = bool(false)]; + bool var_1462_transpose_y_0 = const()[name = string("op_1462_transpose_y_0"), val = bool(false)]; + tensor v_51_cast_fp16 = transpose(perm = var_1456, x = var_1455_cast_fp16)[name = string("transpose_109")]; + tensor var_1462_cast_fp16 = matmul(transpose_x = var_1462_transpose_x_0, transpose_y = var_1462_transpose_y_0, x = var_1460_cast_fp16, y = v_51_cast_fp16)[name = string("op_1462_cast_fp16")]; + tensor var_1463 = const()[name = string("op_1463"), val = tensor([0, 2, 1, 3])]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([1, 1500, 1280])]; + tensor var_1464_cast_fp16 = transpose(perm = var_1463, x = var_1462_cast_fp16)[name = string("transpose_108")]; + tensor x_155_cast_fp16 = reshape(shape = concat_12, x = var_1464_cast_fp16)[name = string("x_155_cast_fp16")]; + tensor var_1468_to_fp16 = const()[name = string("op_1468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496373376)))]; + tensor var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499650240)))]; + tensor linear_75_cast_fp16 = linear(bias = var_1469_to_fp16, weight = var_1468_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1476_axes_0 = const()[name = string("op_1476_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499652864)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499655488)))]; + tensor var_1476_cast_fp16 = layer_norm(axes = var_1476_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1476_cast_fp16")]; + tensor var_1485_to_fp16 = const()[name = string("op_1485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(499658112)))]; + tensor var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(512765376)))]; + tensor linear_76_cast_fp16 = linear(bias = var_1486_to_fp16, weight = var_1485_to_fp16, x = var_1476_cast_fp16)[name = string("linear_76_cast_fp16")]; + string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")]; + tensor x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")]; + tensor var_1491_to_fp16 = const()[name = string("op_1491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(512775680)))]; + tensor var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525882944)))]; + tensor linear_77_cast_fp16 = linear(bias = var_1492_to_fp16, weight = var_1491_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")]; + int32 var_1502 = const()[name = string("op_1502"), val = int32(-1)]; + tensor var_1518_axes_0 = const()[name = string("op_1518_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525885568)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525888192)))]; + fp16 var_1508_to_fp16 = const()[name = string("op_1508_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1518_cast_fp16 = layer_norm(axes = var_1518_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1518_cast_fp16")]; + tensor var_1529_to_fp16 = const()[name = string("op_1529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(525890816)))]; + tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529167680)))]; + tensor linear_78_cast_fp16 = linear(bias = var_1530_to_fp16, weight = var_1529_to_fp16, x = var_1518_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor var_1533_to_fp16 = const()[name = string("op_1533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529170304)))]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1533_to_fp16, x = var_1518_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor var_1537_to_fp16 = const()[name = string("op_1537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(532447168)))]; + tensor var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(535724032)))]; + tensor linear_80_cast_fp16 = linear(bias = var_1538_to_fp16, weight = var_1537_to_fp16, x = var_1518_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([1, 1500, 20, -1])]; + tensor var_1547_cast_fp16 = reshape(shape = var_1546, x = linear_78_cast_fp16)[name = string("op_1547_cast_fp16")]; + tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1547_cast_fp16, y = const_250_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1553 = const()[name = string("op_1553"), val = tensor([1, 1500, 20, -1])]; + tensor var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_79_cast_fp16)[name = string("op_1554_cast_fp16")]; + tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1554_cast_fp16, y = const_251_to_fp16)[name = string("k_55_cast_fp16")]; + tensor var_1560 = const()[name = string("op_1560"), val = tensor([1, 1500, 20, -1])]; + tensor var_1561_cast_fp16 = reshape(shape = var_1560, x = linear_80_cast_fp16)[name = string("op_1561_cast_fp16")]; + tensor var_1562 = const()[name = string("op_1562"), val = tensor([0, 2, -3, -1])]; + bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)]; + bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = k_55_cast_fp16)[name = string("transpose_106")]; + tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = q_55_cast_fp16)[name = string("transpose_107")]; + tensor qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_27_cast_fp16")]; + tensor var_1566_cast_fp16 = softmax(axis = var_1502, x = qk_27_cast_fp16)[name = string("op_1566_cast_fp16")]; + bool var_1568_transpose_x_0 = const()[name = string("op_1568_transpose_x_0"), val = bool(false)]; + bool var_1568_transpose_y_0 = const()[name = string("op_1568_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1562, x = var_1561_cast_fp16)[name = string("transpose_105")]; + tensor var_1568_cast_fp16 = matmul(transpose_x = var_1568_transpose_x_0, transpose_y = var_1568_transpose_y_0, x = var_1566_cast_fp16, y = v_55_cast_fp16)[name = string("op_1568_cast_fp16")]; + tensor var_1569 = const()[name = string("op_1569"), val = tensor([0, 2, 1, 3])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([1, 1500, 1280])]; + tensor var_1570_cast_fp16 = transpose(perm = var_1569, x = var_1568_cast_fp16)[name = string("transpose_104")]; + tensor x_167_cast_fp16 = reshape(shape = concat_13, x = var_1570_cast_fp16)[name = string("x_167_cast_fp16")]; + tensor var_1574_to_fp16 = const()[name = string("op_1574_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(535726656)))]; + tensor var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539003520)))]; + tensor linear_81_cast_fp16 = linear(bias = var_1575_to_fp16, weight = var_1574_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_1582_axes_0 = const()[name = string("op_1582_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539006144)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539008768)))]; + tensor var_1582_cast_fp16 = layer_norm(axes = var_1582_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1582_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = string("op_1591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539011392)))]; + tensor var_1592_to_fp16 = const()[name = string("op_1592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552118656)))]; + tensor linear_82_cast_fp16 = linear(bias = var_1592_to_fp16, weight = var_1591_to_fp16, x = var_1582_cast_fp16)[name = string("linear_82_cast_fp16")]; + string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")]; + tensor x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")]; + tensor var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552128960)))]; + tensor var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565236224)))]; + tensor linear_83_cast_fp16 = linear(bias = var_1598_to_fp16, weight = var_1597_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")]; + int32 var_1608 = const()[name = string("op_1608"), val = int32(-1)]; + tensor var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565238848)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565241472)))]; + fp16 var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1624_cast_fp16 = layer_norm(axes = var_1624_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1624_cast_fp16")]; + tensor var_1635_to_fp16 = const()[name = string("op_1635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565244096)))]; + tensor var_1636_to_fp16 = const()[name = string("op_1636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568520960)))]; + tensor linear_84_cast_fp16 = linear(bias = var_1636_to_fp16, weight = var_1635_to_fp16, x = var_1624_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor var_1639_to_fp16 = const()[name = string("op_1639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568523584)))]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1639_to_fp16, x = var_1624_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(571800448)))]; + tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575077312)))]; + tensor linear_86_cast_fp16 = linear(bias = var_1644_to_fp16, weight = var_1643_to_fp16, x = var_1624_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor var_1652 = const()[name = string("op_1652"), val = tensor([1, 1500, 20, -1])]; + tensor var_1653_cast_fp16 = reshape(shape = var_1652, x = linear_84_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_252_to_fp16)[name = string("q_59_cast_fp16")]; + tensor var_1659 = const()[name = string("op_1659"), val = tensor([1, 1500, 20, -1])]; + tensor var_1660_cast_fp16 = reshape(shape = var_1659, x = linear_85_cast_fp16)[name = string("op_1660_cast_fp16")]; + tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1660_cast_fp16, y = const_253_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1666 = const()[name = string("op_1666"), val = tensor([1, 1500, 20, -1])]; + tensor var_1667_cast_fp16 = reshape(shape = var_1666, x = linear_86_cast_fp16)[name = string("op_1667_cast_fp16")]; + tensor var_1668 = const()[name = string("op_1668"), val = tensor([0, 2, -3, -1])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = k_59_cast_fp16)[name = string("transpose_102")]; + tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = q_59_cast_fp16)[name = string("transpose_103")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_29_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_1608, x = qk_29_cast_fp16)[name = string("op_1672_cast_fp16")]; + bool var_1674_transpose_x_0 = const()[name = string("op_1674_transpose_x_0"), val = bool(false)]; + bool var_1674_transpose_y_0 = const()[name = string("op_1674_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1668, x = var_1667_cast_fp16)[name = string("transpose_101")]; + tensor var_1674_cast_fp16 = matmul(transpose_x = var_1674_transpose_x_0, transpose_y = var_1674_transpose_y_0, x = var_1672_cast_fp16, y = v_59_cast_fp16)[name = string("op_1674_cast_fp16")]; + tensor var_1675 = const()[name = string("op_1675"), val = tensor([0, 2, 1, 3])]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 1500, 1280])]; + tensor var_1676_cast_fp16 = transpose(perm = var_1675, x = var_1674_cast_fp16)[name = string("transpose_100")]; + tensor x_179_cast_fp16 = reshape(shape = concat_14, x = var_1676_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575079936)))]; + tensor var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578356800)))]; + tensor linear_87_cast_fp16 = linear(bias = var_1681_to_fp16, weight = var_1680_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1688_axes_0 = const()[name = string("op_1688_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578359424)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578362048)))]; + tensor var_1688_cast_fp16 = layer_norm(axes = var_1688_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1688_cast_fp16")]; + tensor var_1697_to_fp16 = const()[name = string("op_1697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578364672)))]; + tensor var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591471936)))]; + tensor linear_88_cast_fp16 = linear(bias = var_1698_to_fp16, weight = var_1697_to_fp16, x = var_1688_cast_fp16)[name = string("linear_88_cast_fp16")]; + string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")]; + tensor x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1703_to_fp16 = const()[name = string("op_1703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591482240)))]; + tensor var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604589504)))]; + tensor linear_89_cast_fp16 = linear(bias = var_1704_to_fp16, weight = var_1703_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")]; + int32 var_1714 = const()[name = string("op_1714"), val = int32(-1)]; + tensor var_1730_axes_0 = const()[name = string("op_1730_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604592128)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604594752)))]; + fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1730_cast_fp16 = layer_norm(axes = var_1730_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1730_cast_fp16")]; + tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604597376)))]; + tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(607874240)))]; + tensor linear_90_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1730_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_1745_to_fp16 = const()[name = string("op_1745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(607876864)))]; + tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1745_to_fp16, x = var_1730_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor var_1749_to_fp16 = const()[name = string("op_1749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(611153728)))]; + tensor var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614430592)))]; + tensor linear_92_cast_fp16 = linear(bias = var_1750_to_fp16, weight = var_1749_to_fp16, x = var_1730_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor var_1758 = const()[name = string("op_1758"), val = tensor([1, 1500, 20, -1])]; + tensor var_1759_cast_fp16 = reshape(shape = var_1758, x = linear_90_cast_fp16)[name = string("op_1759_cast_fp16")]; + tensor const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1759_cast_fp16, y = const_254_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1765 = const()[name = string("op_1765"), val = tensor([1, 1500, 20, -1])]; + tensor var_1766_cast_fp16 = reshape(shape = var_1765, x = linear_91_cast_fp16)[name = string("op_1766_cast_fp16")]; + tensor const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_63_cast_fp16 = mul(x = var_1766_cast_fp16, y = const_255_to_fp16)[name = string("k_63_cast_fp16")]; + tensor var_1772 = const()[name = string("op_1772"), val = tensor([1, 1500, 20, -1])]; + tensor var_1773_cast_fp16 = reshape(shape = var_1772, x = linear_92_cast_fp16)[name = string("op_1773_cast_fp16")]; + tensor var_1774 = const()[name = string("op_1774"), val = tensor([0, 2, -3, -1])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = k_63_cast_fp16)[name = string("transpose_98")]; + tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = q_63_cast_fp16)[name = string("transpose_99")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_31_cast_fp16")]; + tensor var_1778_cast_fp16 = softmax(axis = var_1714, x = qk_31_cast_fp16)[name = string("op_1778_cast_fp16")]; + bool var_1780_transpose_x_0 = const()[name = string("op_1780_transpose_x_0"), val = bool(false)]; + bool var_1780_transpose_y_0 = const()[name = string("op_1780_transpose_y_0"), val = bool(false)]; + tensor v_63_cast_fp16 = transpose(perm = var_1774, x = var_1773_cast_fp16)[name = string("transpose_97")]; + tensor var_1780_cast_fp16 = matmul(transpose_x = var_1780_transpose_x_0, transpose_y = var_1780_transpose_y_0, x = var_1778_cast_fp16, y = v_63_cast_fp16)[name = string("op_1780_cast_fp16")]; + tensor var_1781 = const()[name = string("op_1781"), val = tensor([0, 2, 1, 3])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([1, 1500, 1280])]; + tensor var_1782_cast_fp16 = transpose(perm = var_1781, x = var_1780_cast_fp16)[name = string("transpose_96")]; + tensor x_191_cast_fp16 = reshape(shape = concat_15, x = var_1782_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614433216)))]; + tensor var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617710080)))]; + tensor linear_93_cast_fp16 = linear(bias = var_1787_to_fp16, weight = var_1786_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_193_cast_fp16_1 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_1794_axes_0 = const()[name = string("op_1794_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617712704)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617715328)))]; + tensor var_1794_cast_fp16 = layer_norm(axes = var_1794_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16_1)[name = string("op_1794_cast_fp16")]; + tensor var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(617717952)))]; + tensor var_1804_to_fp16 = const()[name = string("op_1804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(630825216)))]; + tensor linear_94_cast_fp16 = linear(bias = var_1804_to_fp16, weight = var_1803_to_fp16, x = var_1794_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")]; + tensor x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(630835520)))]; + tensor var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(643942784)))]; + tensor linear_95_cast_fp16_1 = linear(bias = var_1810_to_fp16, weight = var_1809_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")]; + string x_193_cast_fp16_dtype_0 = const()[name = string("x_193_cast_fp16_dtype_0"), val = string("fp32")]; + string linear_95_cast_fp16_dtype_0 = const()[name = string("linear_95_cast_fp16_dtype_0"), val = string("fp32")]; + tensor linear_95_cast_fp16 = cast(dtype = linear_95_cast_fp16_dtype_0, x = linear_95_cast_fp16_1)[name = string("cast_2")]; + tensor x_193_cast_fp16 = cast(dtype = x_193_cast_fp16_dtype_0, x = x_193_cast_fp16_1)[name = string("cast_3")]; + } -> (x_193_cast_fp16, linear_95_cast_fp16); +} \ No newline at end of file diff --git a/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin b/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fbb183d75a322c46705714925320d8d872f2431 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model0/weights/0-weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d6315a62c6344e1bf4ac88f7f7c8408cc886645c98e8989f249229fd9e9c70 +size 643945408 diff --git a/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin b/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model1/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a +size 108 diff --git a/large-v2/encoder.mlmodelc/model1/coremldata.bin b/large-v2/encoder.mlmodelc/model1/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ea59338ab416594015715ac6994e32a8c96e239 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model1/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70195139816248a2b1fbef695f96decb60b35af6f364f84a7d2293a3d0a09e11 +size 196 diff --git a/large-v2/encoder.mlmodelc/model1/model.mil b/large-v2/encoder.mlmodelc/model1/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..7d3b026fa91fad416f7820629ab7ce05c46aad69 --- /dev/null +++ b/large-v2/encoder.mlmodelc/model1/model.mil @@ -0,0 +1,945 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor linear_95_cast_fp16, tensor x_193_cast_fp16) { + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(64)))]; + string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp16")]; + string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp16")]; + tensor cast_0 = cast(dtype = cast_0_dtype_0, x = linear_95_cast_fp16)[name = string("cast_0")]; + tensor cast_1 = cast(dtype = cast_1_dtype_0, x = x_193_cast_fp16)[name = string("cast_1")]; + tensor x_199_cast_fp16 = add(x = cast_1, y = cast_0)[name = string("x_199_cast_fp16")]; + int32 var_1820 = const()[name = string("op_1820"), val = int32(-1)]; + tensor var_1836_axes_0 = const()[name = string("op_1836_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(2688)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(5312)))]; + fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1836_cast_fp16 = layer_norm(axes = var_1836_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1836_cast_fp16")]; + tensor var_1847_to_fp16 = const()[name = string("op_1847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(7936)))]; + tensor var_1848_to_fp16 = const()[name = string("op_1848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3284800)))]; + tensor linear_96_cast_fp16 = linear(bias = var_1848_to_fp16, weight = var_1847_to_fp16, x = var_1836_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3287424)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1851_to_fp16, x = var_1836_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_1855_to_fp16 = const()[name = string("op_1855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(6564288)))]; + tensor var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9841152)))]; + tensor linear_98_cast_fp16 = linear(bias = var_1856_to_fp16, weight = var_1855_to_fp16, x = var_1836_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_1864 = const()[name = string("op_1864"), val = tensor([1, 1500, 20, -1])]; + tensor var_1865_cast_fp16 = reshape(shape = var_1864, x = linear_96_cast_fp16)[name = string("op_1865_cast_fp16")]; + tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1865_cast_fp16, y = const_256_to_fp16)[name = string("q_67_cast_fp16")]; + tensor var_1871 = const()[name = string("op_1871"), val = tensor([1, 1500, 20, -1])]; + tensor var_1872_cast_fp16 = reshape(shape = var_1871, x = linear_97_cast_fp16)[name = string("op_1872_cast_fp16")]; + tensor const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_67_cast_fp16 = mul(x = var_1872_cast_fp16, y = const_257_to_fp16)[name = string("k_67_cast_fp16")]; + tensor var_1878 = const()[name = string("op_1878"), val = tensor([1, 1500, 20, -1])]; + tensor var_1879_cast_fp16 = reshape(shape = var_1878, x = linear_98_cast_fp16)[name = string("op_1879_cast_fp16")]; + tensor var_1880 = const()[name = string("op_1880"), val = tensor([0, 2, -3, -1])]; + bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)]; + bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = k_67_cast_fp16)[name = string("transpose_158")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = q_67_cast_fp16)[name = string("transpose_159")]; + tensor qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_33_cast_fp16")]; + tensor var_1884_cast_fp16 = softmax(axis = var_1820, x = qk_33_cast_fp16)[name = string("op_1884_cast_fp16")]; + bool var_1886_transpose_x_0 = const()[name = string("op_1886_transpose_x_0"), val = bool(false)]; + bool var_1886_transpose_y_0 = const()[name = string("op_1886_transpose_y_0"), val = bool(false)]; + tensor v_67_cast_fp16 = transpose(perm = var_1880, x = var_1879_cast_fp16)[name = string("transpose_157")]; + tensor var_1886_cast_fp16 = matmul(transpose_x = var_1886_transpose_x_0, transpose_y = var_1886_transpose_y_0, x = var_1884_cast_fp16, y = v_67_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor var_1887 = const()[name = string("op_1887"), val = tensor([0, 2, 1, 3])]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([1, 1500, 1280])]; + tensor var_1888_cast_fp16 = transpose(perm = var_1887, x = var_1886_cast_fp16)[name = string("transpose_156")]; + tensor x_203_cast_fp16 = reshape(shape = concat_16, x = var_1888_cast_fp16)[name = string("x_203_cast_fp16")]; + tensor var_1892_to_fp16 = const()[name = string("op_1892_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9843776)))]; + tensor var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13120640)))]; + tensor linear_99_cast_fp16 = linear(bias = var_1893_to_fp16, weight = var_1892_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_1900_axes_0 = const()[name = string("op_1900_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13123264)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13125888)))]; + tensor var_1900_cast_fp16 = layer_norm(axes = var_1900_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1900_cast_fp16")]; + tensor var_1909_to_fp16 = const()[name = string("op_1909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13128512)))]; + tensor var_1910_to_fp16 = const()[name = string("op_1910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26235776)))]; + tensor linear_100_cast_fp16 = linear(bias = var_1910_to_fp16, weight = var_1909_to_fp16, x = var_1900_cast_fp16)[name = string("linear_100_cast_fp16")]; + string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")]; + tensor x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26246080)))]; + tensor var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39353344)))]; + tensor linear_101_cast_fp16 = linear(bias = var_1916_to_fp16, weight = var_1915_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")]; + int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)]; + tensor var_1942_axes_0 = const()[name = string("op_1942_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39355968)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39358592)))]; + fp16 var_1932_to_fp16 = const()[name = string("op_1932_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1942_cast_fp16 = layer_norm(axes = var_1942_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1942_cast_fp16")]; + tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39361216)))]; + tensor var_1954_to_fp16 = const()[name = string("op_1954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42638080)))]; + tensor linear_102_cast_fp16 = linear(bias = var_1954_to_fp16, weight = var_1953_to_fp16, x = var_1942_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42640704)))]; + tensor linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1957_to_fp16, x = var_1942_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor var_1961_to_fp16 = const()[name = string("op_1961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(45917568)))]; + tensor var_1962_to_fp16 = const()[name = string("op_1962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49194432)))]; + tensor linear_104_cast_fp16 = linear(bias = var_1962_to_fp16, weight = var_1961_to_fp16, x = var_1942_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_1970 = const()[name = string("op_1970"), val = tensor([1, 1500, 20, -1])]; + tensor var_1971_cast_fp16 = reshape(shape = var_1970, x = linear_102_cast_fp16)[name = string("op_1971_cast_fp16")]; + tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_1971_cast_fp16, y = const_258_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_1977 = const()[name = string("op_1977"), val = tensor([1, 1500, 20, -1])]; + tensor var_1978_cast_fp16 = reshape(shape = var_1977, x = linear_103_cast_fp16)[name = string("op_1978_cast_fp16")]; + tensor const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_71_cast_fp16 = mul(x = var_1978_cast_fp16, y = const_259_to_fp16)[name = string("k_71_cast_fp16")]; + tensor var_1984 = const()[name = string("op_1984"), val = tensor([1, 1500, 20, -1])]; + tensor var_1985_cast_fp16 = reshape(shape = var_1984, x = linear_104_cast_fp16)[name = string("op_1985_cast_fp16")]; + tensor var_1986 = const()[name = string("op_1986"), val = tensor([0, 2, -3, -1])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = k_71_cast_fp16)[name = string("transpose_154")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = q_71_cast_fp16)[name = string("transpose_155")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_35_cast_fp16")]; + tensor var_1990_cast_fp16 = softmax(axis = var_1926, x = qk_35_cast_fp16)[name = string("op_1990_cast_fp16")]; + bool var_1992_transpose_x_0 = const()[name = string("op_1992_transpose_x_0"), val = bool(false)]; + bool var_1992_transpose_y_0 = const()[name = string("op_1992_transpose_y_0"), val = bool(false)]; + tensor v_71_cast_fp16 = transpose(perm = var_1986, x = var_1985_cast_fp16)[name = string("transpose_153")]; + tensor var_1992_cast_fp16 = matmul(transpose_x = var_1992_transpose_x_0, transpose_y = var_1992_transpose_y_0, x = var_1990_cast_fp16, y = v_71_cast_fp16)[name = string("op_1992_cast_fp16")]; + tensor var_1993 = const()[name = string("op_1993"), val = tensor([0, 2, 1, 3])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 1500, 1280])]; + tensor var_1994_cast_fp16 = transpose(perm = var_1993, x = var_1992_cast_fp16)[name = string("transpose_152")]; + tensor x_215_cast_fp16 = reshape(shape = concat_17, x = var_1994_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49197056)))]; + tensor var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52473920)))]; + tensor linear_105_cast_fp16 = linear(bias = var_1999_to_fp16, weight = var_1998_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2006_axes_0 = const()[name = string("op_2006_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52476544)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52479168)))]; + tensor var_2006_cast_fp16 = layer_norm(axes = var_2006_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_2006_cast_fp16")]; + tensor var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52481792)))]; + tensor var_2016_to_fp16 = const()[name = string("op_2016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65589056)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2016_to_fp16, weight = var_2015_to_fp16, x = var_2006_cast_fp16)[name = string("linear_106_cast_fp16")]; + string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")]; + tensor x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")]; + tensor var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65599360)))]; + tensor var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78706624)))]; + tensor linear_107_cast_fp16 = linear(bias = var_2022_to_fp16, weight = var_2021_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")]; + int32 var_2032 = const()[name = string("op_2032"), val = int32(-1)]; + tensor var_2048_axes_0 = const()[name = string("op_2048_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78709248)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78711872)))]; + fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2048_cast_fp16 = layer_norm(axes = var_2048_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2048_cast_fp16")]; + tensor var_2059_to_fp16 = const()[name = string("op_2059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78714496)))]; + tensor var_2060_to_fp16 = const()[name = string("op_2060_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81991360)))]; + tensor linear_108_cast_fp16 = linear(bias = var_2060_to_fp16, weight = var_2059_to_fp16, x = var_2048_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81993984)))]; + tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2063_to_fp16, x = var_2048_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(85270848)))]; + tensor var_2068_to_fp16 = const()[name = string("op_2068_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88547712)))]; + tensor linear_110_cast_fp16 = linear(bias = var_2068_to_fp16, weight = var_2067_to_fp16, x = var_2048_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor var_2076 = const()[name = string("op_2076"), val = tensor([1, 1500, 20, -1])]; + tensor var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_108_cast_fp16)[name = string("op_2077_cast_fp16")]; + tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2077_cast_fp16, y = const_260_to_fp16)[name = string("q_75_cast_fp16")]; + tensor var_2083 = const()[name = string("op_2083"), val = tensor([1, 1500, 20, -1])]; + tensor var_2084_cast_fp16 = reshape(shape = var_2083, x = linear_109_cast_fp16)[name = string("op_2084_cast_fp16")]; + tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_2084_cast_fp16, y = const_261_to_fp16)[name = string("k_75_cast_fp16")]; + tensor var_2090 = const()[name = string("op_2090"), val = tensor([1, 1500, 20, -1])]; + tensor var_2091_cast_fp16 = reshape(shape = var_2090, x = linear_110_cast_fp16)[name = string("op_2091_cast_fp16")]; + tensor var_2092 = const()[name = string("op_2092"), val = tensor([0, 2, -3, -1])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = k_75_cast_fp16)[name = string("transpose_150")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = q_75_cast_fp16)[name = string("transpose_151")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_37_cast_fp16")]; + tensor var_2096_cast_fp16 = softmax(axis = var_2032, x = qk_37_cast_fp16)[name = string("op_2096_cast_fp16")]; + bool var_2098_transpose_x_0 = const()[name = string("op_2098_transpose_x_0"), val = bool(false)]; + bool var_2098_transpose_y_0 = const()[name = string("op_2098_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_2092, x = var_2091_cast_fp16)[name = string("transpose_149")]; + tensor var_2098_cast_fp16 = matmul(transpose_x = var_2098_transpose_x_0, transpose_y = var_2098_transpose_y_0, x = var_2096_cast_fp16, y = v_75_cast_fp16)[name = string("op_2098_cast_fp16")]; + tensor var_2099 = const()[name = string("op_2099"), val = tensor([0, 2, 1, 3])]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([1, 1500, 1280])]; + tensor var_2100_cast_fp16 = transpose(perm = var_2099, x = var_2098_cast_fp16)[name = string("transpose_148")]; + tensor x_227_cast_fp16 = reshape(shape = concat_18, x = var_2100_cast_fp16)[name = string("x_227_cast_fp16")]; + tensor var_2104_to_fp16 = const()[name = string("op_2104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88550336)))]; + tensor var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91827200)))]; + tensor linear_111_cast_fp16 = linear(bias = var_2105_to_fp16, weight = var_2104_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2112_axes_0 = const()[name = string("op_2112_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91829824)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91832448)))]; + tensor var_2112_cast_fp16 = layer_norm(axes = var_2112_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2112_cast_fp16")]; + tensor var_2121_to_fp16 = const()[name = string("op_2121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91835072)))]; + tensor var_2122_to_fp16 = const()[name = string("op_2122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104942336)))]; + tensor linear_112_cast_fp16 = linear(bias = var_2122_to_fp16, weight = var_2121_to_fp16, x = var_2112_cast_fp16)[name = string("linear_112_cast_fp16")]; + string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")]; + tensor x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")]; + tensor var_2127_to_fp16 = const()[name = string("op_2127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104952640)))]; + tensor var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118059904)))]; + tensor linear_113_cast_fp16 = linear(bias = var_2128_to_fp16, weight = var_2127_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")]; + int32 var_2138 = const()[name = string("op_2138"), val = int32(-1)]; + tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118062528)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118065152)))]; + fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2154_cast_fp16")]; + tensor var_2165_to_fp16 = const()[name = string("op_2165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118067776)))]; + tensor var_2166_to_fp16 = const()[name = string("op_2166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121344640)))]; + tensor linear_114_cast_fp16 = linear(bias = var_2166_to_fp16, weight = var_2165_to_fp16, x = var_2154_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121347264)))]; + tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2169_to_fp16, x = var_2154_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor var_2173_to_fp16 = const()[name = string("op_2173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(124624128)))]; + tensor var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127900992)))]; + tensor linear_116_cast_fp16 = linear(bias = var_2174_to_fp16, weight = var_2173_to_fp16, x = var_2154_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_2182 = const()[name = string("op_2182"), val = tensor([1, 1500, 20, -1])]; + tensor var_2183_cast_fp16 = reshape(shape = var_2182, x = linear_114_cast_fp16)[name = string("op_2183_cast_fp16")]; + tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2183_cast_fp16, y = const_262_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2189 = const()[name = string("op_2189"), val = tensor([1, 1500, 20, -1])]; + tensor var_2190_cast_fp16 = reshape(shape = var_2189, x = linear_115_cast_fp16)[name = string("op_2190_cast_fp16")]; + tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_2190_cast_fp16, y = const_263_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_2196 = const()[name = string("op_2196"), val = tensor([1, 1500, 20, -1])]; + tensor var_2197_cast_fp16 = reshape(shape = var_2196, x = linear_116_cast_fp16)[name = string("op_2197_cast_fp16")]; + tensor var_2198 = const()[name = string("op_2198"), val = tensor([0, 2, -3, -1])]; + bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)]; + bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = k_79_cast_fp16)[name = string("transpose_146")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = q_79_cast_fp16)[name = string("transpose_147")]; + tensor qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_39_cast_fp16")]; + tensor var_2202_cast_fp16 = softmax(axis = var_2138, x = qk_39_cast_fp16)[name = string("op_2202_cast_fp16")]; + bool var_2204_transpose_x_0 = const()[name = string("op_2204_transpose_x_0"), val = bool(false)]; + bool var_2204_transpose_y_0 = const()[name = string("op_2204_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_2198, x = var_2197_cast_fp16)[name = string("transpose_145")]; + tensor var_2204_cast_fp16 = matmul(transpose_x = var_2204_transpose_x_0, transpose_y = var_2204_transpose_y_0, x = var_2202_cast_fp16, y = v_79_cast_fp16)[name = string("op_2204_cast_fp16")]; + tensor var_2205 = const()[name = string("op_2205"), val = tensor([0, 2, 1, 3])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([1, 1500, 1280])]; + tensor var_2206_cast_fp16 = transpose(perm = var_2205, x = var_2204_cast_fp16)[name = string("transpose_144")]; + tensor x_239_cast_fp16 = reshape(shape = concat_19, x = var_2206_cast_fp16)[name = string("x_239_cast_fp16")]; + tensor var_2210_to_fp16 = const()[name = string("op_2210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127903616)))]; + tensor var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131180480)))]; + tensor linear_117_cast_fp16 = linear(bias = var_2211_to_fp16, weight = var_2210_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2218_axes_0 = const()[name = string("op_2218_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131183104)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131185728)))]; + tensor var_2218_cast_fp16 = layer_norm(axes = var_2218_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2218_cast_fp16")]; + tensor var_2227_to_fp16 = const()[name = string("op_2227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131188352)))]; + tensor var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144295616)))]; + tensor linear_118_cast_fp16 = linear(bias = var_2228_to_fp16, weight = var_2227_to_fp16, x = var_2218_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")]; + tensor x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_2233_to_fp16 = const()[name = string("op_2233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144305920)))]; + tensor var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157413184)))]; + tensor linear_119_cast_fp16 = linear(bias = var_2234_to_fp16, weight = var_2233_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")]; + int32 var_2244 = const()[name = string("op_2244"), val = int32(-1)]; + tensor var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157415808)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157418432)))]; + fp16 var_2250_to_fp16 = const()[name = string("op_2250_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2260_cast_fp16 = layer_norm(axes = var_2260_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2260_cast_fp16")]; + tensor var_2271_to_fp16 = const()[name = string("op_2271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157421056)))]; + tensor var_2272_to_fp16 = const()[name = string("op_2272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160697920)))]; + tensor linear_120_cast_fp16 = linear(bias = var_2272_to_fp16, weight = var_2271_to_fp16, x = var_2260_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_2275_to_fp16 = const()[name = string("op_2275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160700544)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2275_to_fp16, x = var_2260_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_2279_to_fp16 = const()[name = string("op_2279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(163977408)))]; + tensor var_2280_to_fp16 = const()[name = string("op_2280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167254272)))]; + tensor linear_122_cast_fp16 = linear(bias = var_2280_to_fp16, weight = var_2279_to_fp16, x = var_2260_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_2288 = const()[name = string("op_2288"), val = tensor([1, 1500, 20, -1])]; + tensor var_2289_cast_fp16 = reshape(shape = var_2288, x = linear_120_cast_fp16)[name = string("op_2289_cast_fp16")]; + tensor const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2289_cast_fp16, y = const_264_to_fp16)[name = string("q_83_cast_fp16")]; + tensor var_2295 = const()[name = string("op_2295"), val = tensor([1, 1500, 20, -1])]; + tensor var_2296_cast_fp16 = reshape(shape = var_2295, x = linear_121_cast_fp16)[name = string("op_2296_cast_fp16")]; + tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_83_cast_fp16 = mul(x = var_2296_cast_fp16, y = const_265_to_fp16)[name = string("k_83_cast_fp16")]; + tensor var_2302 = const()[name = string("op_2302"), val = tensor([1, 1500, 20, -1])]; + tensor var_2303_cast_fp16 = reshape(shape = var_2302, x = linear_122_cast_fp16)[name = string("op_2303_cast_fp16")]; + tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, -3, -1])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = k_83_cast_fp16)[name = string("transpose_142")]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = q_83_cast_fp16)[name = string("transpose_143")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_41_cast_fp16")]; + tensor var_2308_cast_fp16 = softmax(axis = var_2244, x = qk_41_cast_fp16)[name = string("op_2308_cast_fp16")]; + bool var_2310_transpose_x_0 = const()[name = string("op_2310_transpose_x_0"), val = bool(false)]; + bool var_2310_transpose_y_0 = const()[name = string("op_2310_transpose_y_0"), val = bool(false)]; + tensor v_83_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_141")]; + tensor var_2310_cast_fp16 = matmul(transpose_x = var_2310_transpose_x_0, transpose_y = var_2310_transpose_y_0, x = var_2308_cast_fp16, y = v_83_cast_fp16)[name = string("op_2310_cast_fp16")]; + tensor var_2311 = const()[name = string("op_2311"), val = tensor([0, 2, 1, 3])]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([1, 1500, 1280])]; + tensor var_2312_cast_fp16 = transpose(perm = var_2311, x = var_2310_cast_fp16)[name = string("transpose_140")]; + tensor x_251_cast_fp16 = reshape(shape = concat_20, x = var_2312_cast_fp16)[name = string("x_251_cast_fp16")]; + tensor var_2316_to_fp16 = const()[name = string("op_2316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167256896)))]; + tensor var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170533760)))]; + tensor linear_123_cast_fp16 = linear(bias = var_2317_to_fp16, weight = var_2316_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_2324_axes_0 = const()[name = string("op_2324_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170536384)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170539008)))]; + tensor var_2324_cast_fp16 = layer_norm(axes = var_2324_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2324_cast_fp16")]; + tensor var_2333_to_fp16 = const()[name = string("op_2333_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170541632)))]; + tensor var_2334_to_fp16 = const()[name = string("op_2334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183648896)))]; + tensor linear_124_cast_fp16 = linear(bias = var_2334_to_fp16, weight = var_2333_to_fp16, x = var_2324_cast_fp16)[name = string("linear_124_cast_fp16")]; + string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")]; + tensor x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")]; + tensor var_2339_to_fp16 = const()[name = string("op_2339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183659200)))]; + tensor var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196766464)))]; + tensor linear_125_cast_fp16 = linear(bias = var_2340_to_fp16, weight = var_2339_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")]; + int32 var_2350 = const()[name = string("op_2350"), val = int32(-1)]; + tensor var_2366_axes_0 = const()[name = string("op_2366_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196769088)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196771712)))]; + fp16 var_2356_to_fp16 = const()[name = string("op_2356_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2366_cast_fp16 = layer_norm(axes = var_2366_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2366_cast_fp16")]; + tensor var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196774336)))]; + tensor var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200051200)))]; + tensor linear_126_cast_fp16 = linear(bias = var_2378_to_fp16, weight = var_2377_to_fp16, x = var_2366_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200053824)))]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2381_to_fp16, x = var_2366_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor var_2385_to_fp16 = const()[name = string("op_2385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(203330688)))]; + tensor var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206607552)))]; + tensor linear_128_cast_fp16 = linear(bias = var_2386_to_fp16, weight = var_2385_to_fp16, x = var_2366_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_2394 = const()[name = string("op_2394"), val = tensor([1, 1500, 20, -1])]; + tensor var_2395_cast_fp16 = reshape(shape = var_2394, x = linear_126_cast_fp16)[name = string("op_2395_cast_fp16")]; + tensor const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_266_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2401 = const()[name = string("op_2401"), val = tensor([1, 1500, 20, -1])]; + tensor var_2402_cast_fp16 = reshape(shape = var_2401, x = linear_127_cast_fp16)[name = string("op_2402_cast_fp16")]; + tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_87_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_267_to_fp16)[name = string("k_87_cast_fp16")]; + tensor var_2408 = const()[name = string("op_2408"), val = tensor([1, 1500, 20, -1])]; + tensor var_2409_cast_fp16 = reshape(shape = var_2408, x = linear_128_cast_fp16)[name = string("op_2409_cast_fp16")]; + tensor var_2410 = const()[name = string("op_2410"), val = tensor([0, 2, -3, -1])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = k_87_cast_fp16)[name = string("transpose_138")]; + tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = q_87_cast_fp16)[name = string("transpose_139")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_43_cast_fp16")]; + tensor var_2414_cast_fp16 = softmax(axis = var_2350, x = qk_43_cast_fp16)[name = string("op_2414_cast_fp16")]; + bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)]; + bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)]; + tensor v_87_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_137")]; + tensor var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_87_cast_fp16)[name = string("op_2416_cast_fp16")]; + tensor var_2417 = const()[name = string("op_2417"), val = tensor([0, 2, 1, 3])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([1, 1500, 1280])]; + tensor var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_136")]; + tensor x_263_cast_fp16 = reshape(shape = concat_21, x = var_2418_cast_fp16)[name = string("x_263_cast_fp16")]; + tensor var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206610176)))]; + tensor var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209887040)))]; + tensor linear_129_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209889664)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209892288)))]; + tensor var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2430_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209894912)))]; + tensor var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223002176)))]; + tensor linear_130_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_130_cast_fp16")]; + string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")]; + tensor x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")]; + tensor var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223012480)))]; + tensor var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236119744)))]; + tensor linear_131_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")]; + int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)]; + tensor var_2472_axes_0 = const()[name = string("op_2472_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236122368)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236124992)))]; + fp16 var_2462_to_fp16 = const()[name = string("op_2462_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2472_cast_fp16 = layer_norm(axes = var_2472_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2472_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = string("op_2483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236127616)))]; + tensor var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239404480)))]; + tensor linear_132_cast_fp16 = linear(bias = var_2484_to_fp16, weight = var_2483_to_fp16, x = var_2472_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239407104)))]; + tensor linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2487_to_fp16, x = var_2472_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = string("op_2491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(242683968)))]; + tensor var_2492_to_fp16 = const()[name = string("op_2492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245960832)))]; + tensor linear_134_cast_fp16 = linear(bias = var_2492_to_fp16, weight = var_2491_to_fp16, x = var_2472_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor var_2500 = const()[name = string("op_2500"), val = tensor([1, 1500, 20, -1])]; + tensor var_2501_cast_fp16 = reshape(shape = var_2500, x = linear_132_cast_fp16)[name = string("op_2501_cast_fp16")]; + tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2501_cast_fp16, y = const_268_to_fp16)[name = string("q_91_cast_fp16")]; + tensor var_2507 = const()[name = string("op_2507"), val = tensor([1, 1500, 20, -1])]; + tensor var_2508_cast_fp16 = reshape(shape = var_2507, x = linear_133_cast_fp16)[name = string("op_2508_cast_fp16")]; + tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_91_cast_fp16 = mul(x = var_2508_cast_fp16, y = const_269_to_fp16)[name = string("k_91_cast_fp16")]; + tensor var_2514 = const()[name = string("op_2514"), val = tensor([1, 1500, 20, -1])]; + tensor var_2515_cast_fp16 = reshape(shape = var_2514, x = linear_134_cast_fp16)[name = string("op_2515_cast_fp16")]; + tensor var_2516 = const()[name = string("op_2516"), val = tensor([0, 2, -3, -1])]; + bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)]; + bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = k_91_cast_fp16)[name = string("transpose_134")]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = q_91_cast_fp16)[name = string("transpose_135")]; + tensor qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_45_cast_fp16")]; + tensor var_2520_cast_fp16 = softmax(axis = var_2456, x = qk_45_cast_fp16)[name = string("op_2520_cast_fp16")]; + bool var_2522_transpose_x_0 = const()[name = string("op_2522_transpose_x_0"), val = bool(false)]; + bool var_2522_transpose_y_0 = const()[name = string("op_2522_transpose_y_0"), val = bool(false)]; + tensor v_91_cast_fp16 = transpose(perm = var_2516, x = var_2515_cast_fp16)[name = string("transpose_133")]; + tensor var_2522_cast_fp16 = matmul(transpose_x = var_2522_transpose_x_0, transpose_y = var_2522_transpose_y_0, x = var_2520_cast_fp16, y = v_91_cast_fp16)[name = string("op_2522_cast_fp16")]; + tensor var_2523 = const()[name = string("op_2523"), val = tensor([0, 2, 1, 3])]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([1, 1500, 1280])]; + tensor var_2524_cast_fp16 = transpose(perm = var_2523, x = var_2522_cast_fp16)[name = string("transpose_132")]; + tensor x_275_cast_fp16 = reshape(shape = concat_22, x = var_2524_cast_fp16)[name = string("x_275_cast_fp16")]; + tensor var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245963456)))]; + tensor var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249240320)))]; + tensor linear_135_cast_fp16 = linear(bias = var_2529_to_fp16, weight = var_2528_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_2536_axes_0 = const()[name = string("op_2536_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249242944)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249245568)))]; + tensor var_2536_cast_fp16 = layer_norm(axes = var_2536_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2536_cast_fp16")]; + tensor var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249248192)))]; + tensor var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262355456)))]; + tensor linear_136_cast_fp16 = linear(bias = var_2546_to_fp16, weight = var_2545_to_fp16, x = var_2536_cast_fp16)[name = string("linear_136_cast_fp16")]; + string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")]; + tensor x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")]; + tensor var_2551_to_fp16 = const()[name = string("op_2551_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262365760)))]; + tensor var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275473024)))]; + tensor linear_137_cast_fp16 = linear(bias = var_2552_to_fp16, weight = var_2551_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")]; + int32 var_2562 = const()[name = string("op_2562"), val = int32(-1)]; + tensor var_2578_axes_0 = const()[name = string("op_2578_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275475648)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275478272)))]; + fp16 var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2578_cast_fp16 = layer_norm(axes = var_2578_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2578_cast_fp16")]; + tensor var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275480896)))]; + tensor var_2590_to_fp16 = const()[name = string("op_2590_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278757760)))]; + tensor linear_138_cast_fp16 = linear(bias = var_2590_to_fp16, weight = var_2589_to_fp16, x = var_2578_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_2593_to_fp16 = const()[name = string("op_2593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278760384)))]; + tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2593_to_fp16, x = var_2578_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor var_2597_to_fp16 = const()[name = string("op_2597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(282037248)))]; + tensor var_2598_to_fp16 = const()[name = string("op_2598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285314112)))]; + tensor linear_140_cast_fp16 = linear(bias = var_2598_to_fp16, weight = var_2597_to_fp16, x = var_2578_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor var_2606 = const()[name = string("op_2606"), val = tensor([1, 1500, 20, -1])]; + tensor var_2607_cast_fp16 = reshape(shape = var_2606, x = linear_138_cast_fp16)[name = string("op_2607_cast_fp16")]; + tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_95_cast_fp16 = mul(x = var_2607_cast_fp16, y = const_270_to_fp16)[name = string("q_95_cast_fp16")]; + tensor var_2613 = const()[name = string("op_2613"), val = tensor([1, 1500, 20, -1])]; + tensor var_2614_cast_fp16 = reshape(shape = var_2613, x = linear_139_cast_fp16)[name = string("op_2614_cast_fp16")]; + tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2614_cast_fp16, y = const_271_to_fp16)[name = string("k_95_cast_fp16")]; + tensor var_2620 = const()[name = string("op_2620"), val = tensor([1, 1500, 20, -1])]; + tensor var_2621_cast_fp16 = reshape(shape = var_2620, x = linear_140_cast_fp16)[name = string("op_2621_cast_fp16")]; + tensor var_2622 = const()[name = string("op_2622"), val = tensor([0, 2, -3, -1])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = k_95_cast_fp16)[name = string("transpose_130")]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = q_95_cast_fp16)[name = string("transpose_131")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_47_cast_fp16")]; + tensor var_2626_cast_fp16 = softmax(axis = var_2562, x = qk_47_cast_fp16)[name = string("op_2626_cast_fp16")]; + bool var_2628_transpose_x_0 = const()[name = string("op_2628_transpose_x_0"), val = bool(false)]; + bool var_2628_transpose_y_0 = const()[name = string("op_2628_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2622, x = var_2621_cast_fp16)[name = string("transpose_129")]; + tensor var_2628_cast_fp16 = matmul(transpose_x = var_2628_transpose_x_0, transpose_y = var_2628_transpose_y_0, x = var_2626_cast_fp16, y = v_95_cast_fp16)[name = string("op_2628_cast_fp16")]; + tensor var_2629 = const()[name = string("op_2629"), val = tensor([0, 2, 1, 3])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([1, 1500, 1280])]; + tensor var_2630_cast_fp16 = transpose(perm = var_2629, x = var_2628_cast_fp16)[name = string("transpose_128")]; + tensor x_287_cast_fp16 = reshape(shape = concat_23, x = var_2630_cast_fp16)[name = string("x_287_cast_fp16")]; + tensor var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285316736)))]; + tensor var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288593600)))]; + tensor linear_141_cast_fp16 = linear(bias = var_2635_to_fp16, weight = var_2634_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_2642_axes_0 = const()[name = string("op_2642_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288596224)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288598848)))]; + tensor var_2642_cast_fp16 = layer_norm(axes = var_2642_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2642_cast_fp16")]; + tensor var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288601472)))]; + tensor var_2652_to_fp16 = const()[name = string("op_2652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301708736)))]; + tensor linear_142_cast_fp16 = linear(bias = var_2652_to_fp16, weight = var_2651_to_fp16, x = var_2642_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")]; + tensor x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301719040)))]; + tensor var_2658_to_fp16 = const()[name = string("op_2658_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314826304)))]; + tensor linear_143_cast_fp16 = linear(bias = var_2658_to_fp16, weight = var_2657_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_295_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_295_cast_fp16")]; + int32 var_2668 = const()[name = string("op_2668"), val = int32(-1)]; + tensor var_2684_axes_0 = const()[name = string("op_2684_axes_0"), val = tensor([-1])]; + tensor blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314828928)))]; + tensor blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314831552)))]; + fp16 var_2674_to_fp16 = const()[name = string("op_2674_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2684_cast_fp16 = layer_norm(axes = var_2684_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_295_cast_fp16)[name = string("op_2684_cast_fp16")]; + tensor var_2695_to_fp16 = const()[name = string("op_2695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314834176)))]; + tensor var_2696_to_fp16 = const()[name = string("op_2696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318111040)))]; + tensor linear_144_cast_fp16 = linear(bias = var_2696_to_fp16, weight = var_2695_to_fp16, x = var_2684_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_2699_to_fp16 = const()[name = string("op_2699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318113664)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2699_to_fp16, x = var_2684_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor var_2703_to_fp16 = const()[name = string("op_2703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(321390528)))]; + tensor var_2704_to_fp16 = const()[name = string("op_2704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324667392)))]; + tensor linear_146_cast_fp16 = linear(bias = var_2704_to_fp16, weight = var_2703_to_fp16, x = var_2684_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor var_2712 = const()[name = string("op_2712"), val = tensor([1, 1500, 20, -1])]; + tensor var_2713_cast_fp16 = reshape(shape = var_2712, x = linear_144_cast_fp16)[name = string("op_2713_cast_fp16")]; + tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_99_cast_fp16 = mul(x = var_2713_cast_fp16, y = const_272_to_fp16)[name = string("q_99_cast_fp16")]; + tensor var_2719 = const()[name = string("op_2719"), val = tensor([1, 1500, 20, -1])]; + tensor var_2720_cast_fp16 = reshape(shape = var_2719, x = linear_145_cast_fp16)[name = string("op_2720_cast_fp16")]; + tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2720_cast_fp16, y = const_273_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2726 = const()[name = string("op_2726"), val = tensor([1, 1500, 20, -1])]; + tensor var_2727_cast_fp16 = reshape(shape = var_2726, x = linear_146_cast_fp16)[name = string("op_2727_cast_fp16")]; + tensor var_2728 = const()[name = string("op_2728"), val = tensor([0, 2, -3, -1])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = k_99_cast_fp16)[name = string("transpose_126")]; + tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = q_99_cast_fp16)[name = string("transpose_127")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_49_cast_fp16")]; + tensor var_2732_cast_fp16 = softmax(axis = var_2668, x = qk_49_cast_fp16)[name = string("op_2732_cast_fp16")]; + bool var_2734_transpose_x_0 = const()[name = string("op_2734_transpose_x_0"), val = bool(false)]; + bool var_2734_transpose_y_0 = const()[name = string("op_2734_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2728, x = var_2727_cast_fp16)[name = string("transpose_125")]; + tensor var_2734_cast_fp16 = matmul(transpose_x = var_2734_transpose_x_0, transpose_y = var_2734_transpose_y_0, x = var_2732_cast_fp16, y = v_99_cast_fp16)[name = string("op_2734_cast_fp16")]; + tensor var_2735 = const()[name = string("op_2735"), val = tensor([0, 2, 1, 3])]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([1, 1500, 1280])]; + tensor var_2736_cast_fp16 = transpose(perm = var_2735, x = var_2734_cast_fp16)[name = string("transpose_124")]; + tensor x_299_cast_fp16 = reshape(shape = concat_24, x = var_2736_cast_fp16)[name = string("x_299_cast_fp16")]; + tensor var_2740_to_fp16 = const()[name = string("op_2740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324670016)))]; + tensor var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327946880)))]; + tensor linear_147_cast_fp16 = linear(bias = var_2741_to_fp16, weight = var_2740_to_fp16, x = x_299_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor x_301_cast_fp16 = add(x = x_295_cast_fp16, y = linear_147_cast_fp16)[name = string("x_301_cast_fp16")]; + tensor var_2748_axes_0 = const()[name = string("op_2748_axes_0"), val = tensor([-1])]; + tensor blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327949504)))]; + tensor blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327952128)))]; + tensor var_2748_cast_fp16 = layer_norm(axes = var_2748_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_301_cast_fp16)[name = string("op_2748_cast_fp16")]; + tensor var_2757_to_fp16 = const()[name = string("op_2757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327954752)))]; + tensor var_2758_to_fp16 = const()[name = string("op_2758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341062016)))]; + tensor linear_148_cast_fp16 = linear(bias = var_2758_to_fp16, weight = var_2757_to_fp16, x = var_2748_cast_fp16)[name = string("linear_148_cast_fp16")]; + string x_305_mode_0 = const()[name = string("x_305_mode_0"), val = string("EXACT")]; + tensor x_305_cast_fp16 = gelu(mode = x_305_mode_0, x = linear_148_cast_fp16)[name = string("x_305_cast_fp16")]; + tensor var_2763_to_fp16 = const()[name = string("op_2763_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341072320)))]; + tensor var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354179584)))]; + tensor linear_149_cast_fp16 = linear(bias = var_2764_to_fp16, weight = var_2763_to_fp16, x = x_305_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor x_307_cast_fp16 = add(x = x_301_cast_fp16, y = linear_149_cast_fp16)[name = string("x_307_cast_fp16")]; + int32 var_2774 = const()[name = string("op_2774"), val = int32(-1)]; + tensor var_2790_axes_0 = const()[name = string("op_2790_axes_0"), val = tensor([-1])]; + tensor blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354182208)))]; + tensor blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354184832)))]; + fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2790_cast_fp16 = layer_norm(axes = var_2790_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_307_cast_fp16)[name = string("op_2790_cast_fp16")]; + tensor var_2801_to_fp16 = const()[name = string("op_2801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354187456)))]; + tensor var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357464320)))]; + tensor linear_150_cast_fp16 = linear(bias = var_2802_to_fp16, weight = var_2801_to_fp16, x = var_2790_cast_fp16)[name = string("linear_150_cast_fp16")]; + tensor var_2805_to_fp16 = const()[name = string("op_2805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357466944)))]; + tensor linear_151_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2805_to_fp16, x = var_2790_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(360743808)))]; + tensor var_2810_to_fp16 = const()[name = string("op_2810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364020672)))]; + tensor linear_152_cast_fp16 = linear(bias = var_2810_to_fp16, weight = var_2809_to_fp16, x = var_2790_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor var_2818 = const()[name = string("op_2818"), val = tensor([1, 1500, 20, -1])]; + tensor var_2819_cast_fp16 = reshape(shape = var_2818, x = linear_150_cast_fp16)[name = string("op_2819_cast_fp16")]; + tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_103_cast_fp16 = mul(x = var_2819_cast_fp16, y = const_274_to_fp16)[name = string("q_103_cast_fp16")]; + tensor var_2825 = const()[name = string("op_2825"), val = tensor([1, 1500, 20, -1])]; + tensor var_2826_cast_fp16 = reshape(shape = var_2825, x = linear_151_cast_fp16)[name = string("op_2826_cast_fp16")]; + tensor const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_103_cast_fp16 = mul(x = var_2826_cast_fp16, y = const_275_to_fp16)[name = string("k_103_cast_fp16")]; + tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 1500, 20, -1])]; + tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = linear_152_cast_fp16)[name = string("op_2833_cast_fp16")]; + tensor var_2834 = const()[name = string("op_2834"), val = tensor([0, 2, -3, -1])]; + bool qk_51_transpose_x_0 = const()[name = string("qk_51_transpose_x_0"), val = bool(false)]; + bool qk_51_transpose_y_0 = const()[name = string("qk_51_transpose_y_0"), val = bool(false)]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = k_103_cast_fp16)[name = string("transpose_122")]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = q_103_cast_fp16)[name = string("transpose_123")]; + tensor qk_51_cast_fp16 = matmul(transpose_x = qk_51_transpose_x_0, transpose_y = qk_51_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_51_cast_fp16")]; + tensor var_2838_cast_fp16 = softmax(axis = var_2774, x = qk_51_cast_fp16)[name = string("op_2838_cast_fp16")]; + bool var_2840_transpose_x_0 = const()[name = string("op_2840_transpose_x_0"), val = bool(false)]; + bool var_2840_transpose_y_0 = const()[name = string("op_2840_transpose_y_0"), val = bool(false)]; + tensor v_103_cast_fp16 = transpose(perm = var_2834, x = var_2833_cast_fp16)[name = string("transpose_121")]; + tensor var_2840_cast_fp16 = matmul(transpose_x = var_2840_transpose_x_0, transpose_y = var_2840_transpose_y_0, x = var_2838_cast_fp16, y = v_103_cast_fp16)[name = string("op_2840_cast_fp16")]; + tensor var_2841 = const()[name = string("op_2841"), val = tensor([0, 2, 1, 3])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([1, 1500, 1280])]; + tensor var_2842_cast_fp16 = transpose(perm = var_2841, x = var_2840_cast_fp16)[name = string("transpose_120")]; + tensor x_311_cast_fp16 = reshape(shape = concat_25, x = var_2842_cast_fp16)[name = string("x_311_cast_fp16")]; + tensor var_2846_to_fp16 = const()[name = string("op_2846_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364023296)))]; + tensor var_2847_to_fp16 = const()[name = string("op_2847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367300160)))]; + tensor linear_153_cast_fp16 = linear(bias = var_2847_to_fp16, weight = var_2846_to_fp16, x = x_311_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor x_313_cast_fp16 = add(x = x_307_cast_fp16, y = linear_153_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_2854_axes_0 = const()[name = string("op_2854_axes_0"), val = tensor([-1])]; + tensor blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367302784)))]; + tensor blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367305408)))]; + tensor var_2854_cast_fp16 = layer_norm(axes = var_2854_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_313_cast_fp16)[name = string("op_2854_cast_fp16")]; + tensor var_2863_to_fp16 = const()[name = string("op_2863_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367308032)))]; + tensor var_2864_to_fp16 = const()[name = string("op_2864_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380415296)))]; + tensor linear_154_cast_fp16 = linear(bias = var_2864_to_fp16, weight = var_2863_to_fp16, x = var_2854_cast_fp16)[name = string("linear_154_cast_fp16")]; + string x_317_mode_0 = const()[name = string("x_317_mode_0"), val = string("EXACT")]; + tensor x_317_cast_fp16 = gelu(mode = x_317_mode_0, x = linear_154_cast_fp16)[name = string("x_317_cast_fp16")]; + tensor var_2869_to_fp16 = const()[name = string("op_2869_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380425600)))]; + tensor var_2870_to_fp16 = const()[name = string("op_2870_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393532864)))]; + tensor linear_155_cast_fp16 = linear(bias = var_2870_to_fp16, weight = var_2869_to_fp16, x = x_317_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor x_319_cast_fp16 = add(x = x_313_cast_fp16, y = linear_155_cast_fp16)[name = string("x_319_cast_fp16")]; + int32 var_2880 = const()[name = string("op_2880"), val = int32(-1)]; + tensor var_2896_axes_0 = const()[name = string("op_2896_axes_0"), val = tensor([-1])]; + tensor blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393535488)))]; + tensor blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393538112)))]; + fp16 var_2886_to_fp16 = const()[name = string("op_2886_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2896_cast_fp16 = layer_norm(axes = var_2896_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_319_cast_fp16)[name = string("op_2896_cast_fp16")]; + tensor var_2907_to_fp16 = const()[name = string("op_2907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393540736)))]; + tensor var_2908_to_fp16 = const()[name = string("op_2908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396817600)))]; + tensor linear_156_cast_fp16 = linear(bias = var_2908_to_fp16, weight = var_2907_to_fp16, x = var_2896_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396820224)))]; + tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2911_to_fp16, x = var_2896_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor var_2915_to_fp16 = const()[name = string("op_2915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(400097088)))]; + tensor var_2916_to_fp16 = const()[name = string("op_2916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403373952)))]; + tensor linear_158_cast_fp16 = linear(bias = var_2916_to_fp16, weight = var_2915_to_fp16, x = var_2896_cast_fp16)[name = string("linear_158_cast_fp16")]; + tensor var_2924 = const()[name = string("op_2924"), val = tensor([1, 1500, 20, -1])]; + tensor var_2925_cast_fp16 = reshape(shape = var_2924, x = linear_156_cast_fp16)[name = string("op_2925_cast_fp16")]; + tensor const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_107_cast_fp16 = mul(x = var_2925_cast_fp16, y = const_276_to_fp16)[name = string("q_107_cast_fp16")]; + tensor var_2931 = const()[name = string("op_2931"), val = tensor([1, 1500, 20, -1])]; + tensor var_2932_cast_fp16 = reshape(shape = var_2931, x = linear_157_cast_fp16)[name = string("op_2932_cast_fp16")]; + tensor const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_107_cast_fp16 = mul(x = var_2932_cast_fp16, y = const_277_to_fp16)[name = string("k_107_cast_fp16")]; + tensor var_2938 = const()[name = string("op_2938"), val = tensor([1, 1500, 20, -1])]; + tensor var_2939_cast_fp16 = reshape(shape = var_2938, x = linear_158_cast_fp16)[name = string("op_2939_cast_fp16")]; + tensor var_2940 = const()[name = string("op_2940"), val = tensor([0, 2, -3, -1])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = k_107_cast_fp16)[name = string("transpose_118")]; + tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = q_107_cast_fp16)[name = string("transpose_119")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_53_cast_fp16")]; + tensor var_2944_cast_fp16 = softmax(axis = var_2880, x = qk_53_cast_fp16)[name = string("op_2944_cast_fp16")]; + bool var_2946_transpose_x_0 = const()[name = string("op_2946_transpose_x_0"), val = bool(false)]; + bool var_2946_transpose_y_0 = const()[name = string("op_2946_transpose_y_0"), val = bool(false)]; + tensor v_107_cast_fp16 = transpose(perm = var_2940, x = var_2939_cast_fp16)[name = string("transpose_117")]; + tensor var_2946_cast_fp16 = matmul(transpose_x = var_2946_transpose_x_0, transpose_y = var_2946_transpose_y_0, x = var_2944_cast_fp16, y = v_107_cast_fp16)[name = string("op_2946_cast_fp16")]; + tensor var_2947 = const()[name = string("op_2947"), val = tensor([0, 2, 1, 3])]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([1, 1500, 1280])]; + tensor var_2948_cast_fp16 = transpose(perm = var_2947, x = var_2946_cast_fp16)[name = string("transpose_116")]; + tensor x_323_cast_fp16 = reshape(shape = concat_26, x = var_2948_cast_fp16)[name = string("x_323_cast_fp16")]; + tensor var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403376576)))]; + tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406653440)))]; + tensor linear_159_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = x_323_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor x_325_cast_fp16 = add(x = x_319_cast_fp16, y = linear_159_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_2960_axes_0 = const()[name = string("op_2960_axes_0"), val = tensor([-1])]; + tensor blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406656064)))]; + tensor blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406658688)))]; + tensor var_2960_cast_fp16 = layer_norm(axes = var_2960_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_325_cast_fp16)[name = string("op_2960_cast_fp16")]; + tensor var_2969_to_fp16 = const()[name = string("op_2969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406661312)))]; + tensor var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419768576)))]; + tensor linear_160_cast_fp16 = linear(bias = var_2970_to_fp16, weight = var_2969_to_fp16, x = var_2960_cast_fp16)[name = string("linear_160_cast_fp16")]; + string x_329_mode_0 = const()[name = string("x_329_mode_0"), val = string("EXACT")]; + tensor x_329_cast_fp16 = gelu(mode = x_329_mode_0, x = linear_160_cast_fp16)[name = string("x_329_cast_fp16")]; + tensor var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419778880)))]; + tensor var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432886144)))]; + tensor linear_161_cast_fp16 = linear(bias = var_2976_to_fp16, weight = var_2975_to_fp16, x = x_329_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor x_331_cast_fp16 = add(x = x_325_cast_fp16, y = linear_161_cast_fp16)[name = string("x_331_cast_fp16")]; + int32 var_2986 = const()[name = string("op_2986"), val = int32(-1)]; + tensor var_3002_axes_0 = const()[name = string("op_3002_axes_0"), val = tensor([-1])]; + tensor blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432888768)))]; + tensor blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432891392)))]; + fp16 var_2992_to_fp16 = const()[name = string("op_2992_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3002_cast_fp16 = layer_norm(axes = var_3002_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_331_cast_fp16)[name = string("op_3002_cast_fp16")]; + tensor var_3013_to_fp16 = const()[name = string("op_3013_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432894016)))]; + tensor var_3014_to_fp16 = const()[name = string("op_3014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436170880)))]; + tensor linear_162_cast_fp16 = linear(bias = var_3014_to_fp16, weight = var_3013_to_fp16, x = var_3002_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436173504)))]; + tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3017_to_fp16, x = var_3002_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor var_3021_to_fp16 = const()[name = string("op_3021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(439450368)))]; + tensor var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442727232)))]; + tensor linear_164_cast_fp16 = linear(bias = var_3022_to_fp16, weight = var_3021_to_fp16, x = var_3002_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor var_3030 = const()[name = string("op_3030"), val = tensor([1, 1500, 20, -1])]; + tensor var_3031_cast_fp16 = reshape(shape = var_3030, x = linear_162_cast_fp16)[name = string("op_3031_cast_fp16")]; + tensor const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_111_cast_fp16 = mul(x = var_3031_cast_fp16, y = const_278_to_fp16)[name = string("q_111_cast_fp16")]; + tensor var_3037 = const()[name = string("op_3037"), val = tensor([1, 1500, 20, -1])]; + tensor var_3038_cast_fp16 = reshape(shape = var_3037, x = linear_163_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_111_cast_fp16 = mul(x = var_3038_cast_fp16, y = const_279_to_fp16)[name = string("k_111_cast_fp16")]; + tensor var_3044 = const()[name = string("op_3044"), val = tensor([1, 1500, 20, -1])]; + tensor var_3045_cast_fp16 = reshape(shape = var_3044, x = linear_164_cast_fp16)[name = string("op_3045_cast_fp16")]; + tensor var_3046 = const()[name = string("op_3046"), val = tensor([0, 2, -3, -1])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = k_111_cast_fp16)[name = string("transpose_114")]; + tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = q_111_cast_fp16)[name = string("transpose_115")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_55_cast_fp16")]; + tensor var_3050_cast_fp16 = softmax(axis = var_2986, x = qk_55_cast_fp16)[name = string("op_3050_cast_fp16")]; + bool var_3052_transpose_x_0 = const()[name = string("op_3052_transpose_x_0"), val = bool(false)]; + bool var_3052_transpose_y_0 = const()[name = string("op_3052_transpose_y_0"), val = bool(false)]; + tensor v_111_cast_fp16 = transpose(perm = var_3046, x = var_3045_cast_fp16)[name = string("transpose_113")]; + tensor var_3052_cast_fp16 = matmul(transpose_x = var_3052_transpose_x_0, transpose_y = var_3052_transpose_y_0, x = var_3050_cast_fp16, y = v_111_cast_fp16)[name = string("op_3052_cast_fp16")]; + tensor var_3053 = const()[name = string("op_3053"), val = tensor([0, 2, 1, 3])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([1, 1500, 1280])]; + tensor var_3054_cast_fp16 = transpose(perm = var_3053, x = var_3052_cast_fp16)[name = string("transpose_112")]; + tensor x_335_cast_fp16 = reshape(shape = concat_27, x = var_3054_cast_fp16)[name = string("x_335_cast_fp16")]; + tensor var_3058_to_fp16 = const()[name = string("op_3058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442729856)))]; + tensor var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446006720)))]; + tensor linear_165_cast_fp16 = linear(bias = var_3059_to_fp16, weight = var_3058_to_fp16, x = x_335_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_165_cast_fp16)[name = string("x_337_cast_fp16")]; + tensor var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor([-1])]; + tensor blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446009344)))]; + tensor blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446011968)))]; + tensor var_3066_cast_fp16 = layer_norm(axes = var_3066_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_337_cast_fp16)[name = string("op_3066_cast_fp16")]; + tensor var_3075_to_fp16 = const()[name = string("op_3075_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446014592)))]; + tensor var_3076_to_fp16 = const()[name = string("op_3076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459121856)))]; + tensor linear_166_cast_fp16 = linear(bias = var_3076_to_fp16, weight = var_3075_to_fp16, x = var_3066_cast_fp16)[name = string("linear_166_cast_fp16")]; + string x_341_mode_0 = const()[name = string("x_341_mode_0"), val = string("EXACT")]; + tensor x_341_cast_fp16 = gelu(mode = x_341_mode_0, x = linear_166_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_3081_to_fp16 = const()[name = string("op_3081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459132160)))]; + tensor var_3082_to_fp16 = const()[name = string("op_3082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472239424)))]; + tensor linear_167_cast_fp16 = linear(bias = var_3082_to_fp16, weight = var_3081_to_fp16, x = x_341_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor x_343_cast_fp16 = add(x = x_337_cast_fp16, y = linear_167_cast_fp16)[name = string("x_343_cast_fp16")]; + int32 var_3092 = const()[name = string("op_3092"), val = int32(-1)]; + tensor var_3108_axes_0 = const()[name = string("op_3108_axes_0"), val = tensor([-1])]; + tensor blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472242048)))]; + tensor blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472244672)))]; + fp16 var_3098_to_fp16 = const()[name = string("op_3098_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3108_cast_fp16 = layer_norm(axes = var_3108_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_343_cast_fp16)[name = string("op_3108_cast_fp16")]; + tensor var_3119_to_fp16 = const()[name = string("op_3119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472247296)))]; + tensor var_3120_to_fp16 = const()[name = string("op_3120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475524160)))]; + tensor linear_168_cast_fp16 = linear(bias = var_3120_to_fp16, weight = var_3119_to_fp16, x = var_3108_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor var_3123_to_fp16 = const()[name = string("op_3123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475526784)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3123_to_fp16, x = var_3108_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor var_3127_to_fp16 = const()[name = string("op_3127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(478803648)))]; + tensor var_3128_to_fp16 = const()[name = string("op_3128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482080512)))]; + tensor linear_170_cast_fp16 = linear(bias = var_3128_to_fp16, weight = var_3127_to_fp16, x = var_3108_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor var_3136 = const()[name = string("op_3136"), val = tensor([1, 1500, 20, -1])]; + tensor var_3137_cast_fp16 = reshape(shape = var_3136, x = linear_168_cast_fp16)[name = string("op_3137_cast_fp16")]; + tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_115_cast_fp16 = mul(x = var_3137_cast_fp16, y = const_280_to_fp16)[name = string("q_115_cast_fp16")]; + tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 1500, 20, -1])]; + tensor var_3144_cast_fp16 = reshape(shape = var_3143, x = linear_169_cast_fp16)[name = string("op_3144_cast_fp16")]; + tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_3144_cast_fp16, y = const_281_to_fp16)[name = string("k_115_cast_fp16")]; + tensor var_3150 = const()[name = string("op_3150"), val = tensor([1, 1500, 20, -1])]; + tensor var_3151_cast_fp16 = reshape(shape = var_3150, x = linear_170_cast_fp16)[name = string("op_3151_cast_fp16")]; + tensor var_3152 = const()[name = string("op_3152"), val = tensor([0, 2, -3, -1])]; + bool qk_57_transpose_x_0 = const()[name = string("qk_57_transpose_x_0"), val = bool(false)]; + bool qk_57_transpose_y_0 = const()[name = string("qk_57_transpose_y_0"), val = bool(false)]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = k_115_cast_fp16)[name = string("transpose_110")]; + tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = q_115_cast_fp16)[name = string("transpose_111")]; + tensor qk_57_cast_fp16 = matmul(transpose_x = qk_57_transpose_x_0, transpose_y = qk_57_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_57_cast_fp16")]; + tensor var_3156_cast_fp16 = softmax(axis = var_3092, x = qk_57_cast_fp16)[name = string("op_3156_cast_fp16")]; + bool var_3158_transpose_x_0 = const()[name = string("op_3158_transpose_x_0"), val = bool(false)]; + bool var_3158_transpose_y_0 = const()[name = string("op_3158_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_3152, x = var_3151_cast_fp16)[name = string("transpose_109")]; + tensor var_3158_cast_fp16 = matmul(transpose_x = var_3158_transpose_x_0, transpose_y = var_3158_transpose_y_0, x = var_3156_cast_fp16, y = v_115_cast_fp16)[name = string("op_3158_cast_fp16")]; + tensor var_3159 = const()[name = string("op_3159"), val = tensor([0, 2, 1, 3])]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([1, 1500, 1280])]; + tensor var_3160_cast_fp16 = transpose(perm = var_3159, x = var_3158_cast_fp16)[name = string("transpose_108")]; + tensor x_347_cast_fp16 = reshape(shape = concat_28, x = var_3160_cast_fp16)[name = string("x_347_cast_fp16")]; + tensor var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482083136)))]; + tensor var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485360000)))]; + tensor linear_171_cast_fp16 = linear(bias = var_3165_to_fp16, weight = var_3164_to_fp16, x = x_347_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor x_349_cast_fp16 = add(x = x_343_cast_fp16, y = linear_171_cast_fp16)[name = string("x_349_cast_fp16")]; + tensor var_3172_axes_0 = const()[name = string("op_3172_axes_0"), val = tensor([-1])]; + tensor blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485362624)))]; + tensor blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485365248)))]; + tensor var_3172_cast_fp16 = layer_norm(axes = var_3172_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_349_cast_fp16)[name = string("op_3172_cast_fp16")]; + tensor var_3181_to_fp16 = const()[name = string("op_3181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485367872)))]; + tensor var_3182_to_fp16 = const()[name = string("op_3182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498475136)))]; + tensor linear_172_cast_fp16 = linear(bias = var_3182_to_fp16, weight = var_3181_to_fp16, x = var_3172_cast_fp16)[name = string("linear_172_cast_fp16")]; + string x_353_mode_0 = const()[name = string("x_353_mode_0"), val = string("EXACT")]; + tensor x_353_cast_fp16 = gelu(mode = x_353_mode_0, x = linear_172_cast_fp16)[name = string("x_353_cast_fp16")]; + tensor var_3187_to_fp16 = const()[name = string("op_3187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498485440)))]; + tensor var_3188_to_fp16 = const()[name = string("op_3188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511592704)))]; + tensor linear_173_cast_fp16 = linear(bias = var_3188_to_fp16, weight = var_3187_to_fp16, x = x_353_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor x_355_cast_fp16 = add(x = x_349_cast_fp16, y = linear_173_cast_fp16)[name = string("x_355_cast_fp16")]; + int32 var_3198 = const()[name = string("op_3198"), val = int32(-1)]; + tensor var_3214_axes_0 = const()[name = string("op_3214_axes_0"), val = tensor([-1])]; + tensor blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511595328)))]; + tensor blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511597952)))]; + fp16 var_3204_to_fp16 = const()[name = string("op_3204_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3214_cast_fp16 = layer_norm(axes = var_3214_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_355_cast_fp16)[name = string("op_3214_cast_fp16")]; + tensor var_3225_to_fp16 = const()[name = string("op_3225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511600576)))]; + tensor var_3226_to_fp16 = const()[name = string("op_3226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514877440)))]; + tensor linear_174_cast_fp16 = linear(bias = var_3226_to_fp16, weight = var_3225_to_fp16, x = var_3214_cast_fp16)[name = string("linear_174_cast_fp16")]; + tensor var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514880064)))]; + tensor linear_175_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3229_to_fp16, x = var_3214_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(518156928)))]; + tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521433792)))]; + tensor linear_176_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = var_3214_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor var_3242 = const()[name = string("op_3242"), val = tensor([1, 1500, 20, -1])]; + tensor var_3243_cast_fp16 = reshape(shape = var_3242, x = linear_174_cast_fp16)[name = string("op_3243_cast_fp16")]; + tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_119_cast_fp16 = mul(x = var_3243_cast_fp16, y = const_282_to_fp16)[name = string("q_119_cast_fp16")]; + tensor var_3249 = const()[name = string("op_3249"), val = tensor([1, 1500, 20, -1])]; + tensor var_3250_cast_fp16 = reshape(shape = var_3249, x = linear_175_cast_fp16)[name = string("op_3250_cast_fp16")]; + tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_119_cast_fp16 = mul(x = var_3250_cast_fp16, y = const_283_to_fp16)[name = string("k_119_cast_fp16")]; + tensor var_3256 = const()[name = string("op_3256"), val = tensor([1, 1500, 20, -1])]; + tensor var_3257_cast_fp16 = reshape(shape = var_3256, x = linear_176_cast_fp16)[name = string("op_3257_cast_fp16")]; + tensor var_3258 = const()[name = string("op_3258"), val = tensor([0, 2, -3, -1])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = k_119_cast_fp16)[name = string("transpose_106")]; + tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = q_119_cast_fp16)[name = string("transpose_107")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_59_cast_fp16")]; + tensor var_3262_cast_fp16 = softmax(axis = var_3198, x = qk_59_cast_fp16)[name = string("op_3262_cast_fp16")]; + bool var_3264_transpose_x_0 = const()[name = string("op_3264_transpose_x_0"), val = bool(false)]; + bool var_3264_transpose_y_0 = const()[name = string("op_3264_transpose_y_0"), val = bool(false)]; + tensor v_119_cast_fp16 = transpose(perm = var_3258, x = var_3257_cast_fp16)[name = string("transpose_105")]; + tensor var_3264_cast_fp16 = matmul(transpose_x = var_3264_transpose_x_0, transpose_y = var_3264_transpose_y_0, x = var_3262_cast_fp16, y = v_119_cast_fp16)[name = string("op_3264_cast_fp16")]; + tensor var_3265 = const()[name = string("op_3265"), val = tensor([0, 2, 1, 3])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([1, 1500, 1280])]; + tensor var_3266_cast_fp16 = transpose(perm = var_3265, x = var_3264_cast_fp16)[name = string("transpose_104")]; + tensor x_359_cast_fp16 = reshape(shape = concat_29, x = var_3266_cast_fp16)[name = string("x_359_cast_fp16")]; + tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521436416)))]; + tensor var_3271_to_fp16 = const()[name = string("op_3271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524713280)))]; + tensor linear_177_cast_fp16 = linear(bias = var_3271_to_fp16, weight = var_3270_to_fp16, x = x_359_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor x_361_cast_fp16 = add(x = x_355_cast_fp16, y = linear_177_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_3278_axes_0 = const()[name = string("op_3278_axes_0"), val = tensor([-1])]; + tensor blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524715904)))]; + tensor blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524718528)))]; + tensor var_3278_cast_fp16 = layer_norm(axes = var_3278_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_361_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor var_3287_to_fp16 = const()[name = string("op_3287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524721152)))]; + tensor var_3288_to_fp16 = const()[name = string("op_3288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537828416)))]; + tensor linear_178_cast_fp16 = linear(bias = var_3288_to_fp16, weight = var_3287_to_fp16, x = var_3278_cast_fp16)[name = string("linear_178_cast_fp16")]; + string x_365_mode_0 = const()[name = string("x_365_mode_0"), val = string("EXACT")]; + tensor x_365_cast_fp16 = gelu(mode = x_365_mode_0, x = linear_178_cast_fp16)[name = string("x_365_cast_fp16")]; + tensor var_3293_to_fp16 = const()[name = string("op_3293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537838720)))]; + tensor var_3294_to_fp16 = const()[name = string("op_3294_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550945984)))]; + tensor linear_179_cast_fp16 = linear(bias = var_3294_to_fp16, weight = var_3293_to_fp16, x = x_365_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor x_367_cast_fp16 = add(x = x_361_cast_fp16, y = linear_179_cast_fp16)[name = string("x_367_cast_fp16")]; + int32 var_3304 = const()[name = string("op_3304"), val = int32(-1)]; + tensor var_3320_axes_0 = const()[name = string("op_3320_axes_0"), val = tensor([-1])]; + tensor blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550948608)))]; + tensor blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550951232)))]; + fp16 var_3310_to_fp16 = const()[name = string("op_3310_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3320_cast_fp16 = layer_norm(axes = var_3320_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_367_cast_fp16)[name = string("op_3320_cast_fp16")]; + tensor var_3331_to_fp16 = const()[name = string("op_3331_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550953856)))]; + tensor var_3332_to_fp16 = const()[name = string("op_3332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554230720)))]; + tensor linear_180_cast_fp16 = linear(bias = var_3332_to_fp16, weight = var_3331_to_fp16, x = var_3320_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor var_3335_to_fp16 = const()[name = string("op_3335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554233344)))]; + tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3335_to_fp16, x = var_3320_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor var_3339_to_fp16 = const()[name = string("op_3339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(557510208)))]; + tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560787072)))]; + tensor linear_182_cast_fp16 = linear(bias = var_3340_to_fp16, weight = var_3339_to_fp16, x = var_3320_cast_fp16)[name = string("linear_182_cast_fp16")]; + tensor var_3348 = const()[name = string("op_3348"), val = tensor([1, 1500, 20, -1])]; + tensor var_3349_cast_fp16 = reshape(shape = var_3348, x = linear_180_cast_fp16)[name = string("op_3349_cast_fp16")]; + tensor const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_123_cast_fp16 = mul(x = var_3349_cast_fp16, y = const_284_to_fp16)[name = string("q_123_cast_fp16")]; + tensor var_3355 = const()[name = string("op_3355"), val = tensor([1, 1500, 20, -1])]; + tensor var_3356_cast_fp16 = reshape(shape = var_3355, x = linear_181_cast_fp16)[name = string("op_3356_cast_fp16")]; + tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_123_cast_fp16 = mul(x = var_3356_cast_fp16, y = const_285_to_fp16)[name = string("k_123_cast_fp16")]; + tensor var_3362 = const()[name = string("op_3362"), val = tensor([1, 1500, 20, -1])]; + tensor var_3363_cast_fp16 = reshape(shape = var_3362, x = linear_182_cast_fp16)[name = string("op_3363_cast_fp16")]; + tensor var_3364 = const()[name = string("op_3364"), val = tensor([0, 2, -3, -1])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = k_123_cast_fp16)[name = string("transpose_102")]; + tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = q_123_cast_fp16)[name = string("transpose_103")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_61_cast_fp16")]; + tensor var_3368_cast_fp16 = softmax(axis = var_3304, x = qk_61_cast_fp16)[name = string("op_3368_cast_fp16")]; + bool var_3370_transpose_x_0 = const()[name = string("op_3370_transpose_x_0"), val = bool(false)]; + bool var_3370_transpose_y_0 = const()[name = string("op_3370_transpose_y_0"), val = bool(false)]; + tensor v_123_cast_fp16 = transpose(perm = var_3364, x = var_3363_cast_fp16)[name = string("transpose_101")]; + tensor var_3370_cast_fp16 = matmul(transpose_x = var_3370_transpose_x_0, transpose_y = var_3370_transpose_y_0, x = var_3368_cast_fp16, y = v_123_cast_fp16)[name = string("op_3370_cast_fp16")]; + tensor var_3371 = const()[name = string("op_3371"), val = tensor([0, 2, 1, 3])]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([1, 1500, 1280])]; + tensor var_3372_cast_fp16 = transpose(perm = var_3371, x = var_3370_cast_fp16)[name = string("transpose_100")]; + tensor x_371_cast_fp16 = reshape(shape = concat_30, x = var_3372_cast_fp16)[name = string("x_371_cast_fp16")]; + tensor var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560789696)))]; + tensor var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564066560)))]; + tensor linear_183_cast_fp16 = linear(bias = var_3377_to_fp16, weight = var_3376_to_fp16, x = x_371_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor x_373_cast_fp16 = add(x = x_367_cast_fp16, y = linear_183_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_3384_axes_0 = const()[name = string("op_3384_axes_0"), val = tensor([-1])]; + tensor blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564069184)))]; + tensor blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564071808)))]; + tensor var_3384_cast_fp16 = layer_norm(axes = var_3384_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_373_cast_fp16)[name = string("op_3384_cast_fp16")]; + tensor var_3393_to_fp16 = const()[name = string("op_3393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564074432)))]; + tensor var_3394_to_fp16 = const()[name = string("op_3394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577181696)))]; + tensor linear_184_cast_fp16 = linear(bias = var_3394_to_fp16, weight = var_3393_to_fp16, x = var_3384_cast_fp16)[name = string("linear_184_cast_fp16")]; + string x_377_mode_0 = const()[name = string("x_377_mode_0"), val = string("EXACT")]; + tensor x_377_cast_fp16 = gelu(mode = x_377_mode_0, x = linear_184_cast_fp16)[name = string("x_377_cast_fp16")]; + tensor var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577192000)))]; + tensor var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590299264)))]; + tensor linear_185_cast_fp16 = linear(bias = var_3400_to_fp16, weight = var_3399_to_fp16, x = x_377_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor x_379_cast_fp16 = add(x = x_373_cast_fp16, y = linear_185_cast_fp16)[name = string("x_379_cast_fp16")]; + int32 var_3410 = const()[name = string("op_3410"), val = int32(-1)]; + tensor var_3426_axes_0 = const()[name = string("op_3426_axes_0"), val = tensor([-1])]; + tensor blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590301888)))]; + tensor blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590304512)))]; + fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3426_cast_fp16 = layer_norm(axes = var_3426_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_379_cast_fp16)[name = string("op_3426_cast_fp16")]; + tensor var_3437_to_fp16 = const()[name = string("op_3437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590307136)))]; + tensor var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593584000)))]; + tensor linear_186_cast_fp16 = linear(bias = var_3438_to_fp16, weight = var_3437_to_fp16, x = var_3426_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593586624)))]; + tensor linear_187_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3441_to_fp16, x = var_3426_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(596863488)))]; + tensor var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600140352)))]; + tensor linear_188_cast_fp16 = linear(bias = var_3446_to_fp16, weight = var_3445_to_fp16, x = var_3426_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor var_3454 = const()[name = string("op_3454"), val = tensor([1, 1500, 20, -1])]; + tensor var_3455_cast_fp16 = reshape(shape = var_3454, x = linear_186_cast_fp16)[name = string("op_3455_cast_fp16")]; + tensor const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_3455_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")]; + tensor var_3461 = const()[name = string("op_3461"), val = tensor([1, 1500, 20, -1])]; + tensor var_3462_cast_fp16 = reshape(shape = var_3461, x = linear_187_cast_fp16)[name = string("op_3462_cast_fp16")]; + tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_3462_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")]; + tensor var_3468 = const()[name = string("op_3468"), val = tensor([1, 1500, 20, -1])]; + tensor var_3469_cast_fp16 = reshape(shape = var_3468, x = linear_188_cast_fp16)[name = string("op_3469_cast_fp16")]; + tensor var_3470 = const()[name = string("op_3470"), val = tensor([0, 2, -3, -1])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = k_cast_fp16)[name = string("transpose_98")]; + tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = q_cast_fp16)[name = string("transpose_99")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_cast_fp16")]; + tensor var_3474_cast_fp16 = softmax(axis = var_3410, x = qk_cast_fp16)[name = string("op_3474_cast_fp16")]; + bool var_3476_transpose_x_0 = const()[name = string("op_3476_transpose_x_0"), val = bool(false)]; + bool var_3476_transpose_y_0 = const()[name = string("op_3476_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_3470, x = var_3469_cast_fp16)[name = string("transpose_97")]; + tensor var_3476_cast_fp16 = matmul(transpose_x = var_3476_transpose_x_0, transpose_y = var_3476_transpose_y_0, x = var_3474_cast_fp16, y = v_cast_fp16)[name = string("op_3476_cast_fp16")]; + tensor var_3477 = const()[name = string("op_3477"), val = tensor([0, 2, 1, 3])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([1, 1500, 1280])]; + tensor var_3478_cast_fp16 = transpose(perm = var_3477, x = var_3476_cast_fp16)[name = string("transpose_96")]; + tensor x_383_cast_fp16 = reshape(shape = concat_31, x = var_3478_cast_fp16)[name = string("x_383_cast_fp16")]; + tensor var_3482_to_fp16 = const()[name = string("op_3482_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600142976)))]; + tensor var_3483_to_fp16 = const()[name = string("op_3483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603419840)))]; + tensor linear_189_cast_fp16 = linear(bias = var_3483_to_fp16, weight = var_3482_to_fp16, x = x_383_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor x_385_cast_fp16 = add(x = x_379_cast_fp16, y = linear_189_cast_fp16)[name = string("x_385_cast_fp16")]; + tensor var_3490_axes_0 = const()[name = string("op_3490_axes_0"), val = tensor([-1])]; + tensor blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603422464)))]; + tensor blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603425088)))]; + tensor var_3490_cast_fp16 = layer_norm(axes = var_3490_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_385_cast_fp16)[name = string("op_3490_cast_fp16")]; + tensor var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603427712)))]; + tensor var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616534976)))]; + tensor linear_190_cast_fp16 = linear(bias = var_3500_to_fp16, weight = var_3499_to_fp16, x = var_3490_cast_fp16)[name = string("linear_190_cast_fp16")]; + string x_389_mode_0 = const()[name = string("x_389_mode_0"), val = string("EXACT")]; + tensor x_389_cast_fp16 = gelu(mode = x_389_mode_0, x = linear_190_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616545280)))]; + tensor var_3506_to_fp16 = const()[name = string("op_3506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629652544)))]; + tensor linear_191_cast_fp16 = linear(bias = var_3506_to_fp16, weight = var_3505_to_fp16, x = x_389_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_385_cast_fp16, y = linear_191_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_3519_axes_0 = const()[name = string("op_3519_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629655168)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629657792)))]; + fp16 var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_3519_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_3510_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_3519_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin b/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..428c534992e427a9b38c763e4c3feb452ac04feb --- /dev/null +++ b/large-v2/encoder.mlmodelc/model1/weights/1-weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18deffd43b1f394f0f9d6434ef3e042c9e0424f8b590891a5cb0c21e4951163 +size 629660416 diff --git a/large-v2/model_dims.json b/large-v2/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..e22ceb62f4ffcdfe89361b7377ef1300f531463c --- /dev/null +++ b/large-v2/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 1280, + "n_audio_head": 20, + "n_audio_layer": 32, + "n_vocab": 51865, + "n_text_ctx": 448, + "n_text_state": 1280, + "n_text_head": 20, + "n_text_layer": 32 +} \ No newline at end of file diff --git a/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin b/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3502c1971106c8ddba15a6d19cbe212e9040b51 --- /dev/null +++ b/large-v3/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a564dfd67cfcb3c0ee8cd9f7ef9f303fbfc561e635709bd3a46c5870571079de +size 243 diff --git a/large-v3/decoder_first.mlmodelc/coremldata.bin b/large-v3/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..78fa71791f49b098c63687ec844348e5cd25cd92 --- /dev/null +++ b/large-v3/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6214be9e110a102836fb1fdb960a2fb564e60f5d9e3d1e25a9b7f978309480e +size 453 diff --git a/large-v3/decoder_first.mlmodelc/metadata.json b/large-v3/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..62548d3742d04f712f1bad76294f859bb5029d22 --- /dev/null +++ b/large-v3/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 66, + "Shape" : 64, + "Ios18.linear" : 64, + "Identity" : 1, + "Ios18.gather" : 64, + "Ios18.concat" : 64, + "Ios18.sliceUpdate" : 66, + "Ios18.cast" : 128, + "Ios18.expandDims" : 64, + "Ios18.readState" : 66 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 1280", + "shapeRange" : "[[1, 1], [1, 1500], [1280, 1280]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 1280)", + "type" : "MultiArray", + "shape" : "[1, 1, 1280]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v3/decoder_first.mlmodelc/model.mil b/large-v3/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..5e9505ec80acb3d396de560006ff76f4da79cc6a --- /dev/null +++ b/large-v3/decoder_first.mlmodelc/model.mil @@ -0,0 +1,1851 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1280]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1280, 1280]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_131_to_fp16 = const()[name = string("op_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36700288)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39977152)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_131_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39979776)))]; + tensor var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43256640)))]; + tensor linear_1_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_138_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_138_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_138_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_138_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_138_shape_cast_fp16_to_int16 = cast(dtype = var_138_shape_cast_fp16_to_int16_dtype_0, x = var_138_shape_cast_fp16)[name = string("cast_199")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_138_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_198")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")]; + tensor var_143_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_143_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_143_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_143_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_143_shape_cast_fp16_to_uint16 = cast(dtype = var_143_shape_cast_fp16_to_uint16_dtype_0, x = var_143_shape_cast_fp16)[name = string("cast_197")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_143_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_196")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")]; + tensor var_165_to_fp16 = const()[name = string("op_165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43259264)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_165_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_169_to_fp16 = const()[name = string("op_169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46536128)))]; + tensor var_170_to_fp16 = const()[name = string("op_170_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49812992)))]; + tensor linear_3_cast_fp16 = linear(bias = var_170_to_fp16, weight = var_169_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_172_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_172_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_172_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_172_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_172_shape_cast_fp16_to_uint16 = cast(dtype = var_172_shape_cast_fp16_to_uint16_dtype_0, x = var_172_shape_cast_fp16)[name = string("cast_195")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_172_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_194")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")]; + tensor var_177_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_177_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_177_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_177_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_177_shape_cast_fp16_to_uint16 = cast(dtype = var_177_shape_cast_fp16_to_uint16_dtype_0, x = var_177_shape_cast_fp16)[name = string("cast_193")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_177_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_192")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")]; + tensor var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49815616)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_199_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53092480)))]; + tensor var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56369344)))]; + tensor linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_206_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_206_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_206_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_206_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_206_shape_cast_fp16_to_uint16 = cast(dtype = var_206_shape_cast_fp16_to_uint16_dtype_0, x = var_206_shape_cast_fp16)[name = string("cast_191")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_206_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_190")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")]; + tensor var_211_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_211_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_211_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_211_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_211_shape_cast_fp16_to_uint16 = cast(dtype = var_211_shape_cast_fp16_to_uint16_dtype_0, x = var_211_shape_cast_fp16)[name = string("cast_189")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_211_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_188")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")]; + tensor var_233_to_fp16 = const()[name = string("op_233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56371968)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_233_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_237_to_fp16 = const()[name = string("op_237_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59648832)))]; + tensor var_238_to_fp16 = const()[name = string("op_238_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62925696)))]; + tensor linear_7_cast_fp16 = linear(bias = var_238_to_fp16, weight = var_237_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_240_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_240_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_240_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_240_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_240_shape_cast_fp16_to_uint16 = cast(dtype = var_240_shape_cast_fp16_to_uint16_dtype_0, x = var_240_shape_cast_fp16)[name = string("cast_187")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_240_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_186")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")]; + tensor var_245_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_245_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_245_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_245_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_245_shape_cast_fp16_to_uint16 = cast(dtype = var_245_shape_cast_fp16_to_uint16_dtype_0, x = var_245_shape_cast_fp16)[name = string("cast_185")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_245_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_184")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")]; + tensor var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62928320)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_267_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")]; + tensor var_271_to_fp16 = const()[name = string("op_271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66205184)))]; + tensor var_272_to_fp16 = const()[name = string("op_272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69482048)))]; + tensor linear_9_cast_fp16 = linear(bias = var_272_to_fp16, weight = var_271_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")]; + tensor var_274_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_274_shape_cast_fp16")]; + int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)]; + int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)]; + bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)]; + string var_274_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_274_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)]; + tensor var_274_shape_cast_fp16_to_uint16 = cast(dtype = var_274_shape_cast_fp16_to_uint16_dtype_0, x = var_274_shape_cast_fp16)[name = string("cast_183")]; + uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_274_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")]; + string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor([0])]; + int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_182")]; + tensor expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([4, 0, 0, 0])]; + tensor concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor([0])]; + tensor concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor([0])]; + tensor concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor([0])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")]; + tensor k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")]; + tensor var_279_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_279_shape_cast_fp16")]; + int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)]; + int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)]; + bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)]; + string var_279_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_279_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)]; + tensor var_279_shape_cast_fp16_to_uint16 = cast(dtype = var_279_shape_cast_fp16_to_uint16_dtype_0, x = var_279_shape_cast_fp16)[name = string("cast_181")]; + uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_279_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")]; + string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor([0])]; + int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_180")]; + tensor expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; + tensor concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor([0])]; + tensor concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor([0])]; + tensor concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor([0])]; + int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; + bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; + tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")]; + tensor v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")]; + tensor var_301_to_fp16 = const()[name = string("op_301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69484672)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_301_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")]; + tensor var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72761536)))]; + tensor var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76038400)))]; + tensor linear_11_cast_fp16 = linear(bias = var_306_to_fp16, weight = var_305_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")]; + tensor var_308_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_308_shape_cast_fp16")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_308_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_308_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)]; + tensor var_308_shape_cast_fp16_to_uint16 = cast(dtype = var_308_shape_cast_fp16_to_uint16_dtype_0, x = var_308_shape_cast_fp16)[name = string("cast_179")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_308_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_178")]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([5, 0, 0, 0])]; + tensor concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor([0])]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")]; + tensor k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")]; + tensor var_313_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_313_shape_cast_fp16")]; + int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)]; + int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)]; + bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)]; + string var_313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)]; + tensor var_313_shape_cast_fp16_to_uint16 = cast(dtype = var_313_shape_cast_fp16_to_uint16_dtype_0, x = var_313_shape_cast_fp16)[name = string("cast_177")]; + uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_313_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")]; + string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor([0])]; + int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_176")]; + tensor expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([5, 0, 0, 0])]; + tensor concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor([0])]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")]; + tensor v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")]; + tensor var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76041024)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_335_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")]; + tensor var_339_to_fp16 = const()[name = string("op_339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79317888)))]; + tensor var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82594752)))]; + tensor linear_13_cast_fp16 = linear(bias = var_340_to_fp16, weight = var_339_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")]; + tensor var_342_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_342_shape_cast_fp16")]; + int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)]; + int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)]; + bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)]; + string var_342_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_342_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)]; + tensor var_342_shape_cast_fp16_to_uint16 = cast(dtype = var_342_shape_cast_fp16_to_uint16_dtype_0, x = var_342_shape_cast_fp16)[name = string("cast_175")]; + uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_342_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")]; + string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor([0])]; + int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_174")]; + tensor expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([6, 0, 0, 0])]; + tensor concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor([0])]; + tensor concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor([0])]; + tensor concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor([0])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")]; + tensor k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")]; + tensor var_347_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_347_shape_cast_fp16")]; + int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)]; + int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)]; + bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)]; + string var_347_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_347_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)]; + tensor var_347_shape_cast_fp16_to_uint16 = cast(dtype = var_347_shape_cast_fp16_to_uint16_dtype_0, x = var_347_shape_cast_fp16)[name = string("cast_173")]; + uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_347_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")]; + string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor([0])]; + int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_172")]; + tensor expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([6, 0, 0, 0])]; + tensor concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor([0])]; + tensor concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor([0])]; + tensor concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor([0])]; + int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)]; + bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)]; + tensor concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")]; + tensor v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")]; + tensor var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82597376)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_369_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")]; + tensor var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85874240)))]; + tensor var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89151104)))]; + tensor linear_15_cast_fp16 = linear(bias = var_374_to_fp16, weight = var_373_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")]; + tensor var_376_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_376_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_376_shape_cast_fp16_to_uint16 = cast(dtype = var_376_shape_cast_fp16_to_uint16_dtype_0, x = var_376_shape_cast_fp16)[name = string("cast_171")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_376_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_170")]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([7, 0, 0, 0])]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([0])]; + tensor concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor([0])]; + tensor concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor([0])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")]; + tensor k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")]; + tensor var_381_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_381_shape_cast_fp16")]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_381_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_381_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)]; + tensor var_381_shape_cast_fp16_to_uint16 = cast(dtype = var_381_shape_cast_fp16_to_uint16_dtype_0, x = var_381_shape_cast_fp16)[name = string("cast_169")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_381_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor([0])]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_168")]; + tensor expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([7, 0, 0, 0])]; + tensor concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor([0])]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")]; + tensor v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")]; + tensor var_403_to_fp16 = const()[name = string("op_403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89153728)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_403_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")]; + tensor var_407_to_fp16 = const()[name = string("op_407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92430592)))]; + tensor var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95707456)))]; + tensor linear_17_cast_fp16 = linear(bias = var_408_to_fp16, weight = var_407_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")]; + tensor var_410_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_410_shape_cast_fp16")]; + int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)]; + int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)]; + bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)]; + string var_410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)]; + tensor var_410_shape_cast_fp16_to_uint16 = cast(dtype = var_410_shape_cast_fp16_to_uint16_dtype_0, x = var_410_shape_cast_fp16)[name = string("cast_167")]; + uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_410_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")]; + string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor([0])]; + int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_166")]; + tensor expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([8, 0, 0, 0])]; + tensor concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor([0])]; + tensor concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor([0])]; + tensor concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor([0])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")]; + tensor k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")]; + tensor var_415_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_415_shape_cast_fp16")]; + int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)]; + int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)]; + bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)]; + string var_415_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_415_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)]; + tensor var_415_shape_cast_fp16_to_uint16 = cast(dtype = var_415_shape_cast_fp16_to_uint16_dtype_0, x = var_415_shape_cast_fp16)[name = string("cast_165")]; + uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_415_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")]; + string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor([0])]; + int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_164")]; + tensor expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([8, 0, 0, 0])]; + tensor concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor([0])]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")]; + tensor v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")]; + tensor var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95710080)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_437_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")]; + tensor var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98986944)))]; + tensor var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102263808)))]; + tensor linear_19_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")]; + tensor var_444_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_444_shape_cast_fp16")]; + int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)]; + int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)]; + bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)]; + string var_444_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_444_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)]; + tensor var_444_shape_cast_fp16_to_uint16 = cast(dtype = var_444_shape_cast_fp16_to_uint16_dtype_0, x = var_444_shape_cast_fp16)[name = string("cast_163")]; + uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_444_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")]; + string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_162")]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([9, 0, 0, 0])]; + tensor concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor([0])]; + tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; + tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")]; + tensor k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")]; + tensor var_449_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_449_shape_cast_fp16")]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_449_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_449_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)]; + tensor var_449_shape_cast_fp16_to_uint16 = cast(dtype = var_449_shape_cast_fp16_to_uint16_dtype_0, x = var_449_shape_cast_fp16)[name = string("cast_161")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_449_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor([0])]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_160")]; + tensor expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([9, 0, 0, 0])]; + tensor concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor([0])]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")]; + tensor v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")]; + tensor var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102266432)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_471_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105543296)))]; + tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108820160)))]; + tensor linear_21_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")]; + tensor var_478_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_478_shape_cast_fp16")]; + int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)]; + int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)]; + bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)]; + string var_478_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_478_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)]; + tensor var_478_shape_cast_fp16_to_uint16 = cast(dtype = var_478_shape_cast_fp16_to_uint16_dtype_0, x = var_478_shape_cast_fp16)[name = string("cast_159")]; + uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_478_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")]; + string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor([0])]; + int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_158")]; + tensor expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([10, 0, 0, 0])]; + tensor concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor([0])]; + tensor concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor([0])]; + tensor concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor([0])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")]; + tensor k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")]; + tensor var_483_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_483_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_483_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_483_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)]; + tensor var_483_shape_cast_fp16_to_uint16 = cast(dtype = var_483_shape_cast_fp16_to_uint16_dtype_0, x = var_483_shape_cast_fp16)[name = string("cast_157")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_483_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor([0])]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_156")]; + tensor expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")]; + tensor concat_68 = const()[name = string("concat_68"), val = tensor([10, 0, 0, 0])]; + tensor concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor([0])]; + tensor concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor([0])]; + tensor concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor([0])]; + int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)]; + bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)]; + tensor concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")]; + tensor v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")]; + tensor var_505_to_fp16 = const()[name = string("op_505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108822784)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_505_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")]; + tensor var_509_to_fp16 = const()[name = string("op_509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112099648)))]; + tensor var_510_to_fp16 = const()[name = string("op_510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115376512)))]; + tensor linear_23_cast_fp16 = linear(bias = var_510_to_fp16, weight = var_509_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")]; + tensor var_512_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_512_shape_cast_fp16")]; + int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)]; + int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)]; + bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)]; + string var_512_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_512_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)]; + tensor var_512_shape_cast_fp16_to_uint16 = cast(dtype = var_512_shape_cast_fp16_to_uint16_dtype_0, x = var_512_shape_cast_fp16)[name = string("cast_155")]; + uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_512_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")]; + string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_154")]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([11, 0, 0, 0])]; + tensor concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor([0])]; + tensor concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor([0])]; + tensor concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor([0])]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")]; + tensor k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")]; + tensor var_517_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_517_shape_cast_fp16")]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_517_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_517_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_517_shape_cast_fp16_to_uint16 = cast(dtype = var_517_shape_cast_fp16_to_uint16_dtype_0, x = var_517_shape_cast_fp16)[name = string("cast_153")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_517_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor([0])]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_152")]; + tensor expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")]; + tensor concat_74 = const()[name = string("concat_74"), val = tensor([11, 0, 0, 0])]; + tensor concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor([0])]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")]; + tensor v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")]; + tensor var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115379136)))]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_539_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")]; + tensor var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118656000)))]; + tensor var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121932864)))]; + tensor linear_25_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")]; + tensor var_546_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_546_shape_cast_fp16")]; + int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)]; + int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)]; + bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)]; + string var_546_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_546_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)]; + tensor var_546_shape_cast_fp16_to_uint16 = cast(dtype = var_546_shape_cast_fp16_to_uint16_dtype_0, x = var_546_shape_cast_fp16)[name = string("cast_151")]; + uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_546_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")]; + string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor([0])]; + int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_150")]; + tensor expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")]; + tensor concat_77 = const()[name = string("concat_77"), val = tensor([12, 0, 0, 0])]; + tensor concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor([0])]; + tensor concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor([0])]; + tensor concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor([0])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")]; + tensor k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")]; + tensor var_551_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_551_shape_cast_fp16")]; + int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)]; + int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)]; + bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)]; + string var_551_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_551_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)]; + tensor var_551_shape_cast_fp16_to_uint16 = cast(dtype = var_551_shape_cast_fp16_to_uint16_dtype_0, x = var_551_shape_cast_fp16)[name = string("cast_149")]; + uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_551_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")]; + string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor([0])]; + int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_148")]; + tensor expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")]; + tensor concat_80 = const()[name = string("concat_80"), val = tensor([12, 0, 0, 0])]; + tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([0])]; + tensor concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor([0])]; + tensor concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor([0])]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")]; + tensor v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")]; + tensor var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121935488)))]; + tensor linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_573_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")]; + tensor var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125212352)))]; + tensor var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128489216)))]; + tensor linear_27_cast_fp16 = linear(bias = var_578_to_fp16, weight = var_577_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")]; + tensor var_580_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_580_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_580_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_580_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_580_shape_cast_fp16_to_uint16 = cast(dtype = var_580_shape_cast_fp16_to_uint16_dtype_0, x = var_580_shape_cast_fp16)[name = string("cast_147")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_580_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_146")]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")]; + tensor concat_83 = const()[name = string("concat_83"), val = tensor([13, 0, 0, 0])]; + tensor concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor([0])]; + tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; + tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; + int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; + bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; + tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")]; + tensor k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")]; + tensor var_585_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_585_shape_cast_fp16")]; + int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)]; + int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)]; + bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)]; + string var_585_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_585_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)]; + tensor var_585_shape_cast_fp16_to_uint16 = cast(dtype = var_585_shape_cast_fp16_to_uint16_dtype_0, x = var_585_shape_cast_fp16)[name = string("cast_145")]; + uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_585_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")]; + string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor([0])]; + int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_144")]; + tensor expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([13, 0, 0, 0])]; + tensor concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor([0])]; + tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; + tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; + int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; + bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; + tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")]; + tensor v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")]; + tensor var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128491840)))]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_607_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")]; + tensor var_611_to_fp16 = const()[name = string("op_611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131768704)))]; + tensor var_612_to_fp16 = const()[name = string("op_612_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135045568)))]; + tensor linear_29_cast_fp16 = linear(bias = var_612_to_fp16, weight = var_611_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")]; + tensor var_614_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_614_shape_cast_fp16")]; + int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)]; + int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)]; + bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)]; + string var_614_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_614_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)]; + tensor var_614_shape_cast_fp16_to_uint16 = cast(dtype = var_614_shape_cast_fp16_to_uint16_dtype_0, x = var_614_shape_cast_fp16)[name = string("cast_143")]; + uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_614_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")]; + string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor([0])]; + int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_142")]; + tensor expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([14, 0, 0, 0])]; + tensor concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor([0])]; + tensor concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor([0])]; + tensor concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor([0])]; + int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; + bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; + tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")]; + tensor k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")]; + tensor var_619_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_619_shape_cast_fp16")]; + int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)]; + int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)]; + bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)]; + string var_619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)]; + tensor var_619_shape_cast_fp16_to_uint16 = cast(dtype = var_619_shape_cast_fp16_to_uint16_dtype_0, x = var_619_shape_cast_fp16)[name = string("cast_141")]; + uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_619_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")]; + string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor([0])]; + int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_140")]; + tensor expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")]; + tensor concat_92 = const()[name = string("concat_92"), val = tensor([14, 0, 0, 0])]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")]; + tensor v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")]; + tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135048192)))]; + tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_641_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")]; + tensor var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138325056)))]; + tensor var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141601920)))]; + tensor linear_31_cast_fp16 = linear(bias = var_646_to_fp16, weight = var_645_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")]; + tensor var_648_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_648_shape_cast_fp16")]; + int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)]; + int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)]; + bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)]; + string var_648_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_648_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)]; + tensor var_648_shape_cast_fp16_to_uint16 = cast(dtype = var_648_shape_cast_fp16_to_uint16_dtype_0, x = var_648_shape_cast_fp16)[name = string("cast_139")]; + uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_648_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")]; + string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_138")]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")]; + tensor concat_95 = const()[name = string("concat_95"), val = tensor([15, 0, 0, 0])]; + tensor concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor([0])]; + tensor concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor([0])]; + tensor concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor([0])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")]; + tensor k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = k_cache2)[name = string("coreml_update_state_98")]; + tensor var_653_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_653_shape_cast_fp16")]; + int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)]; + int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)]; + bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)]; + string var_653_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_653_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)]; + tensor var_653_shape_cast_fp16_to_uint16 = cast(dtype = var_653_shape_cast_fp16_to_uint16_dtype_0, x = var_653_shape_cast_fp16)[name = string("cast_137")]; + uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_653_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")]; + string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor([0])]; + int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_136")]; + tensor expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")]; + tensor concat_98 = const()[name = string("concat_98"), val = tensor([15, 0, 0, 0])]; + tensor concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor([0])]; + tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; + tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; + int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; + bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; + tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")]; + tensor v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = v_cache2)[name = string("coreml_update_state_99")]; + tensor var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141604544)))]; + tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_675_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")]; + tensor var_679_to_fp16 = const()[name = string("op_679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144881408)))]; + tensor var_680_to_fp16 = const()[name = string("op_680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148158272)))]; + tensor linear_33_cast_fp16 = linear(bias = var_680_to_fp16, weight = var_679_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")]; + tensor var_682_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_682_shape_cast_fp16")]; + int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)]; + int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)]; + bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)]; + string var_682_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_682_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)]; + tensor var_682_shape_cast_fp16_to_uint16 = cast(dtype = var_682_shape_cast_fp16_to_uint16_dtype_0, x = var_682_shape_cast_fp16)[name = string("cast_135")]; + uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_682_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")]; + string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor([0])]; + int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_134")]; + tensor expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")]; + tensor concat_101 = const()[name = string("concat_101"), val = tensor([16, 0, 0, 0])]; + tensor concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor([0])]; + tensor concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor([0])]; + tensor concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor([0])]; + int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; + bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; + tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")]; + tensor k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_98)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = k_cache2)[name = string("coreml_update_state_100")]; + tensor var_687_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_687_shape_cast_fp16")]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_687_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_687_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)]; + tensor var_687_shape_cast_fp16_to_uint16 = cast(dtype = var_687_shape_cast_fp16_to_uint16_dtype_0, x = var_687_shape_cast_fp16)[name = string("cast_133")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_687_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor([0])]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_132")]; + tensor expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")]; + tensor concat_104 = const()[name = string("concat_104"), val = tensor([16, 0, 0, 0])]; + tensor concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor([0])]; + tensor concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor([0])]; + tensor concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor([0])]; + int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)]; + bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)]; + tensor concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")]; + tensor v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_99)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = v_cache2)[name = string("coreml_update_state_101")]; + tensor var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148160896)))]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_709_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")]; + tensor var_713_to_fp16 = const()[name = string("op_713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151437760)))]; + tensor var_714_to_fp16 = const()[name = string("op_714_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154714624)))]; + tensor linear_35_cast_fp16 = linear(bias = var_714_to_fp16, weight = var_713_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")]; + tensor var_716_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_716_shape_cast_fp16")]; + int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)]; + int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)]; + bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)]; + string var_716_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_716_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)]; + tensor var_716_shape_cast_fp16_to_uint16 = cast(dtype = var_716_shape_cast_fp16_to_uint16_dtype_0, x = var_716_shape_cast_fp16)[name = string("cast_131")]; + uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_716_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")]; + string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_130")]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([17, 0, 0, 0])]; + tensor concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor([0])]; + tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; + tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; + int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; + bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")]; + tensor k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_100)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = k_cache2)[name = string("coreml_update_state_102")]; + tensor var_721_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_721_shape_cast_fp16")]; + int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)]; + int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)]; + bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)]; + string var_721_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_721_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)]; + tensor var_721_shape_cast_fp16_to_uint16 = cast(dtype = var_721_shape_cast_fp16_to_uint16_dtype_0, x = var_721_shape_cast_fp16)[name = string("cast_129")]; + uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_721_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")]; + string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor([0])]; + int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_128")]; + tensor expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")]; + tensor concat_110 = const()[name = string("concat_110"), val = tensor([17, 0, 0, 0])]; + tensor concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor([0])]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")]; + tensor v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_101)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = v_cache2)[name = string("coreml_update_state_103")]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154717248)))]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_743_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")]; + tensor var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157994112)))]; + tensor var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161270976)))]; + tensor linear_37_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")]; + tensor var_750_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_750_shape_cast_fp16")]; + int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)]; + int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)]; + bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)]; + string var_750_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_750_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)]; + tensor var_750_shape_cast_fp16_to_uint16 = cast(dtype = var_750_shape_cast_fp16_to_uint16_dtype_0, x = var_750_shape_cast_fp16)[name = string("cast_127")]; + uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_750_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")]; + string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor([0])]; + int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_126")]; + tensor expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")]; + tensor concat_113 = const()[name = string("concat_113"), val = tensor([18, 0, 0, 0])]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([0])]; + tensor concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor([0])]; + tensor concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor([0])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")]; + tensor k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_102)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = k_cache2)[name = string("coreml_update_state_104")]; + tensor var_755_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_755_shape_cast_fp16")]; + int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; + int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; + bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; + string var_755_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_755_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)]; + tensor var_755_shape_cast_fp16_to_uint16 = cast(dtype = var_755_shape_cast_fp16_to_uint16_dtype_0, x = var_755_shape_cast_fp16)[name = string("cast_125")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_755_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; + string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor([0])]; + int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_124")]; + tensor expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")]; + tensor concat_116 = const()[name = string("concat_116"), val = tensor([18, 0, 0, 0])]; + tensor concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor([0])]; + tensor concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor([0])]; + tensor concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor([0])]; + int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)]; + bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)]; + tensor concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")]; + tensor v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_103)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = v_cache2)[name = string("coreml_update_state_105")]; + tensor var_777_to_fp16 = const()[name = string("op_777_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161273600)))]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_777_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")]; + tensor var_781_to_fp16 = const()[name = string("op_781_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164550464)))]; + tensor var_782_to_fp16 = const()[name = string("op_782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167827328)))]; + tensor linear_39_cast_fp16 = linear(bias = var_782_to_fp16, weight = var_781_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")]; + tensor var_784_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_784_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_784_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_784_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_784_shape_cast_fp16_to_uint16 = cast(dtype = var_784_shape_cast_fp16_to_uint16_dtype_0, x = var_784_shape_cast_fp16)[name = string("cast_123")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_784_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_122")]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")]; + tensor concat_119 = const()[name = string("concat_119"), val = tensor([19, 0, 0, 0])]; + tensor concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor([0])]; + tensor concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor([0])]; + tensor concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor([0])]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")]; + tensor k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_104)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = k_cache2)[name = string("coreml_update_state_106")]; + tensor var_789_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_789_shape_cast_fp16")]; + int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; + int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; + bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; + string var_789_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_789_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)]; + tensor var_789_shape_cast_fp16_to_uint16 = cast(dtype = var_789_shape_cast_fp16_to_uint16_dtype_0, x = var_789_shape_cast_fp16)[name = string("cast_121")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_789_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; + string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor([0])]; + int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_120")]; + tensor expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")]; + tensor concat_122 = const()[name = string("concat_122"), val = tensor([19, 0, 0, 0])]; + tensor concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor([0])]; + tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; + tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; + int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; + bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; + tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")]; + tensor v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_105)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = v_cache2)[name = string("coreml_update_state_107")]; + tensor var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167829952)))]; + tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_811_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")]; + tensor var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171106816)))]; + tensor var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174383680)))]; + tensor linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")]; + tensor var_818_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_818_shape_cast_fp16")]; + int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)]; + int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)]; + bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)]; + string var_818_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_818_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)]; + tensor var_818_shape_cast_fp16_to_uint16 = cast(dtype = var_818_shape_cast_fp16_to_uint16_dtype_0, x = var_818_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_818_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")]; + string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor([0])]; + int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_118")]; + tensor expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([20, 0, 0, 0])]; + tensor concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor([0])]; + tensor concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor([0])]; + tensor concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor([0])]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")]; + tensor k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_106)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = k_cache2)[name = string("coreml_update_state_108")]; + tensor var_823_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_823_shape_cast_fp16")]; + int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; + int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; + bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; + string var_823_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_823_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; + tensor var_823_shape_cast_fp16_to_uint16 = cast(dtype = var_823_shape_cast_fp16_to_uint16_dtype_0, x = var_823_shape_cast_fp16)[name = string("cast_117")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_823_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; + string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor([0])]; + int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_116")]; + tensor expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([20, 0, 0, 0])]; + tensor concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor([0])]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")]; + tensor v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_107)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = v_cache2)[name = string("coreml_update_state_109")]; + tensor var_845_to_fp16 = const()[name = string("op_845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174386304)))]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_845_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")]; + tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177663168)))]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180940032)))]; + tensor linear_43_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")]; + tensor var_852_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_852_shape_cast_fp16")]; + int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)]; + int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)]; + bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)]; + string var_852_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_852_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)]; + tensor var_852_shape_cast_fp16_to_uint16 = cast(dtype = var_852_shape_cast_fp16_to_uint16_dtype_0, x = var_852_shape_cast_fp16)[name = string("cast_115")]; + uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_852_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")]; + string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_114")]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([21, 0, 0, 0])]; + tensor concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor([0])]; + tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; + tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")]; + tensor k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_108)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = k_cache2)[name = string("coreml_update_state_110")]; + tensor var_857_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_857_shape_cast_fp16")]; + int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)]; + int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)]; + bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)]; + string var_857_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_857_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)]; + tensor var_857_shape_cast_fp16_to_uint16 = cast(dtype = var_857_shape_cast_fp16_to_uint16_dtype_0, x = var_857_shape_cast_fp16)[name = string("cast_113")]; + uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_857_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")]; + string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor([0])]; + int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_112")]; + tensor expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")]; + tensor concat_134 = const()[name = string("concat_134"), val = tensor([21, 0, 0, 0])]; + tensor concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor([0])]; + tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; + tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; + int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; + bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; + tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")]; + tensor v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_109)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = v_cache2)[name = string("coreml_update_state_111")]; + tensor var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180942656)))]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_879_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")]; + tensor var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184219520)))]; + tensor var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187496384)))]; + tensor linear_45_cast_fp16 = linear(bias = var_884_to_fp16, weight = var_883_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")]; + tensor var_886_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_886_shape_cast_fp16")]; + int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)]; + int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)]; + bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)]; + string var_886_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_886_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)]; + tensor var_886_shape_cast_fp16_to_uint16 = cast(dtype = var_886_shape_cast_fp16_to_uint16_dtype_0, x = var_886_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_886_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")]; + string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor([0])]; + int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_110")]; + tensor expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")]; + tensor concat_137 = const()[name = string("concat_137"), val = tensor([22, 0, 0, 0])]; + tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([0])]; + tensor concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor([0])]; + tensor concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor([0])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")]; + tensor k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_110)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = k_cache2)[name = string("coreml_update_state_112")]; + tensor var_891_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_891_shape_cast_fp16")]; + int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)]; + int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)]; + bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)]; + string var_891_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_891_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)]; + tensor var_891_shape_cast_fp16_to_uint16 = cast(dtype = var_891_shape_cast_fp16_to_uint16_dtype_0, x = var_891_shape_cast_fp16)[name = string("cast_109")]; + uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_891_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")]; + string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor([0])]; + int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_108")]; + tensor expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")]; + tensor concat_140 = const()[name = string("concat_140"), val = tensor([22, 0, 0, 0])]; + tensor concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor([0])]; + tensor concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor([0])]; + tensor concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor([0])]; + int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)]; + bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)]; + tensor concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")]; + tensor v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_111)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = v_cache2)[name = string("coreml_update_state_113")]; + tensor var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187499008)))]; + tensor linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_913_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")]; + tensor var_917_to_fp16 = const()[name = string("op_917_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190775872)))]; + tensor var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194052736)))]; + tensor linear_47_cast_fp16 = linear(bias = var_918_to_fp16, weight = var_917_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")]; + tensor var_920_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_920_shape_cast_fp16")]; + int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)]; + int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)]; + bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)]; + string var_920_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_920_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)]; + tensor var_920_shape_cast_fp16_to_uint16 = cast(dtype = var_920_shape_cast_fp16_to_uint16_dtype_0, x = var_920_shape_cast_fp16)[name = string("cast_107")]; + uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_920_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")]; + string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_106")]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([23, 0, 0, 0])]; + tensor concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor([0])]; + tensor concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor([0])]; + tensor concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor([0])]; + int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)]; + bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)]; + tensor concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")]; + tensor k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_112)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = k_cache2)[name = string("coreml_update_state_114")]; + tensor var_925_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_925_shape_cast_fp16")]; + int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)]; + int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)]; + bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)]; + string var_925_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_925_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)]; + tensor var_925_shape_cast_fp16_to_uint16 = cast(dtype = var_925_shape_cast_fp16_to_uint16_dtype_0, x = var_925_shape_cast_fp16)[name = string("cast_105")]; + uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_925_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")]; + string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor([0])]; + int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_104")]; + tensor expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")]; + tensor concat_146 = const()[name = string("concat_146"), val = tensor([23, 0, 0, 0])]; + tensor concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor([0])]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")]; + tensor v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_113)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = v_cache2)[name = string("coreml_update_state_115")]; + tensor var_947_to_fp16 = const()[name = string("op_947_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194055360)))]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_947_to_fp16, x = audio_data)[name = string("linear_48_cast_fp16")]; + tensor var_951_to_fp16 = const()[name = string("op_951_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197332224)))]; + tensor var_952_to_fp16 = const()[name = string("op_952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200609088)))]; + tensor linear_49_cast_fp16 = linear(bias = var_952_to_fp16, weight = var_951_to_fp16, x = audio_data)[name = string("linear_49_cast_fp16")]; + tensor var_954_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_954_shape_cast_fp16")]; + int32 gather_48_axis_0 = const()[name = string("gather_48_axis_0"), val = int32(0)]; + int32 gather_48_batch_dims_0 = const()[name = string("gather_48_batch_dims_0"), val = int32(0)]; + bool gather_48_validate_indices_0 = const()[name = string("gather_48_validate_indices_0"), val = bool(false)]; + string var_954_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_954_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_48_to_uint16 = const()[name = string("select_48_to_uint16"), val = uint16(1)]; + tensor var_954_shape_cast_fp16_to_uint16 = cast(dtype = var_954_shape_cast_fp16_to_uint16_dtype_0, x = var_954_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_48_cast_uint16 = gather(axis = gather_48_axis_0, batch_dims = gather_48_batch_dims_0, indices = select_48_to_uint16, validate_indices = gather_48_validate_indices_0, x = var_954_shape_cast_fp16_to_uint16)[name = string("gather_48_cast_uint16")]; + string gather_48_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_48_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_203_axes_0 = const()[name = string("expand_dims_203_axes_0"), val = tensor([0])]; + int32 gather_48_cast_uint16_to_int32 = cast(dtype = gather_48_cast_uint16_to_int32_dtype_0, x = gather_48_cast_uint16)[name = string("cast_102")]; + tensor expand_dims_203 = expand_dims(axes = expand_dims_203_axes_0, x = gather_48_cast_uint16_to_int32)[name = string("expand_dims_203")]; + tensor concat_149 = const()[name = string("concat_149"), val = tensor([24, 0, 0, 0])]; + tensor concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = tensor([0])]; + tensor concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = tensor([0])]; + tensor concat_150_values3_0 = const()[name = string("concat_150_values3_0"), val = tensor([0])]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, expand_dims_203, concat_150_values3_0))[name = string("concat_150")]; + tensor k_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_149, begin_mask = k_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_150, end_mask = k_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_25_stride_0, update = linear_48_cast_fp16, x = coreml_update_state_114)[name = string("k_cache2_internal_tensor_assign_25_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_25_cast_fp16, input = k_cache2)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = k_cache2)[name = string("coreml_update_state_116")]; + tensor var_959_shape_cast_fp16 = shape(x = linear_49_cast_fp16)[name = string("op_959_shape_cast_fp16")]; + int32 gather_49_axis_0 = const()[name = string("gather_49_axis_0"), val = int32(0)]; + int32 gather_49_batch_dims_0 = const()[name = string("gather_49_batch_dims_0"), val = int32(0)]; + bool gather_49_validate_indices_0 = const()[name = string("gather_49_validate_indices_0"), val = bool(false)]; + string var_959_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_959_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_49_to_uint16 = const()[name = string("select_49_to_uint16"), val = uint16(1)]; + tensor var_959_shape_cast_fp16_to_uint16 = cast(dtype = var_959_shape_cast_fp16_to_uint16_dtype_0, x = var_959_shape_cast_fp16)[name = string("cast_101")]; + uint16 gather_49_cast_uint16 = gather(axis = gather_49_axis_0, batch_dims = gather_49_batch_dims_0, indices = select_49_to_uint16, validate_indices = gather_49_validate_indices_0, x = var_959_shape_cast_fp16_to_uint16)[name = string("gather_49_cast_uint16")]; + string gather_49_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_49_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_207_axes_0 = const()[name = string("expand_dims_207_axes_0"), val = tensor([0])]; + int32 gather_49_cast_uint16_to_int32 = cast(dtype = gather_49_cast_uint16_to_int32_dtype_0, x = gather_49_cast_uint16)[name = string("cast_100")]; + tensor expand_dims_207 = expand_dims(axes = expand_dims_207_axes_0, x = gather_49_cast_uint16_to_int32)[name = string("expand_dims_207")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([24, 0, 0, 0])]; + tensor concat_153_values0_0 = const()[name = string("concat_153_values0_0"), val = tensor([0])]; + tensor concat_153_values1_0 = const()[name = string("concat_153_values1_0"), val = tensor([0])]; + tensor concat_153_values3_0 = const()[name = string("concat_153_values3_0"), val = tensor([0])]; + int32 concat_153_axis_0 = const()[name = string("concat_153_axis_0"), val = int32(0)]; + bool concat_153_interleave_0 = const()[name = string("concat_153_interleave_0"), val = bool(false)]; + tensor concat_153 = concat(axis = concat_153_axis_0, interleave = concat_153_interleave_0, values = (concat_153_values0_0, concat_153_values1_0, expand_dims_207, concat_153_values3_0))[name = string("concat_153")]; + tensor v_cache2_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_cache2_internal_tensor_assign_25_begin_mask_0, end = concat_153, end_mask = v_cache2_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_25_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_115)[name = string("v_cache2_internal_tensor_assign_25_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_25_cast_fp16, input = v_cache2)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = v_cache2)[name = string("coreml_update_state_117")]; + tensor var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200611712)))]; + tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_981_to_fp16, x = audio_data)[name = string("linear_50_cast_fp16")]; + tensor var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203888576)))]; + tensor var_986_to_fp16 = const()[name = string("op_986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207165440)))]; + tensor linear_51_cast_fp16 = linear(bias = var_986_to_fp16, weight = var_985_to_fp16, x = audio_data)[name = string("linear_51_cast_fp16")]; + tensor var_988_shape_cast_fp16 = shape(x = linear_50_cast_fp16)[name = string("op_988_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_988_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_988_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_988_shape_cast_fp16_to_uint16 = cast(dtype = var_988_shape_cast_fp16_to_uint16_dtype_0, x = var_988_shape_cast_fp16)[name = string("cast_99")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_988_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_98")]; + tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = gather_50_cast_uint16_to_int32)[name = string("expand_dims_211")]; + tensor concat_155 = const()[name = string("concat_155"), val = tensor([25, 0, 0, 0])]; + tensor concat_156_values0_0 = const()[name = string("concat_156_values0_0"), val = tensor([0])]; + tensor concat_156_values1_0 = const()[name = string("concat_156_values1_0"), val = tensor([0])]; + tensor concat_156_values3_0 = const()[name = string("concat_156_values3_0"), val = tensor([0])]; + int32 concat_156_axis_0 = const()[name = string("concat_156_axis_0"), val = int32(0)]; + bool concat_156_interleave_0 = const()[name = string("concat_156_interleave_0"), val = bool(false)]; + tensor concat_156 = concat(axis = concat_156_axis_0, interleave = concat_156_interleave_0, values = (concat_156_values0_0, concat_156_values1_0, expand_dims_211, concat_156_values3_0))[name = string("concat_156")]; + tensor k_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_155, begin_mask = k_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_156, end_mask = k_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_26_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_116)[name = string("k_cache2_internal_tensor_assign_26_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_26_cast_fp16, input = k_cache2)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = k_cache2)[name = string("coreml_update_state_118")]; + tensor var_993_shape_cast_fp16 = shape(x = linear_51_cast_fp16)[name = string("op_993_shape_cast_fp16")]; + int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; + int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; + bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; + string var_993_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_993_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(1)]; + tensor var_993_shape_cast_fp16_to_uint16 = cast(dtype = var_993_shape_cast_fp16_to_uint16_dtype_0, x = var_993_shape_cast_fp16)[name = string("cast_97")]; + uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_993_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; + string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_215_axes_0 = const()[name = string("expand_dims_215_axes_0"), val = tensor([0])]; + int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_96")]; + tensor expand_dims_215 = expand_dims(axes = expand_dims_215_axes_0, x = gather_51_cast_uint16_to_int32)[name = string("expand_dims_215")]; + tensor concat_158 = const()[name = string("concat_158"), val = tensor([25, 0, 0, 0])]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_215, concat_159_values3_0))[name = string("concat_159")]; + tensor v_cache2_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache2_internal_tensor_assign_26_begin_mask_0, end = concat_159, end_mask = v_cache2_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_26_stride_0, update = linear_51_cast_fp16, x = coreml_update_state_117)[name = string("v_cache2_internal_tensor_assign_26_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_26_cast_fp16, input = v_cache2)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = v_cache2)[name = string("coreml_update_state_119")]; + tensor var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207168064)))]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1015_to_fp16, x = audio_data)[name = string("linear_52_cast_fp16")]; + tensor var_1019_to_fp16 = const()[name = string("op_1019_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210444928)))]; + tensor var_1020_to_fp16 = const()[name = string("op_1020_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213721792)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1020_to_fp16, weight = var_1019_to_fp16, x = audio_data)[name = string("linear_53_cast_fp16")]; + tensor var_1022_shape_cast_fp16 = shape(x = linear_52_cast_fp16)[name = string("op_1022_shape_cast_fp16")]; + int32 gather_52_axis_0 = const()[name = string("gather_52_axis_0"), val = int32(0)]; + int32 gather_52_batch_dims_0 = const()[name = string("gather_52_batch_dims_0"), val = int32(0)]; + bool gather_52_validate_indices_0 = const()[name = string("gather_52_validate_indices_0"), val = bool(false)]; + string var_1022_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1022_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_52_to_uint16 = const()[name = string("select_52_to_uint16"), val = uint16(1)]; + tensor var_1022_shape_cast_fp16_to_uint16 = cast(dtype = var_1022_shape_cast_fp16_to_uint16_dtype_0, x = var_1022_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_52_cast_uint16 = gather(axis = gather_52_axis_0, batch_dims = gather_52_batch_dims_0, indices = select_52_to_uint16, validate_indices = gather_52_validate_indices_0, x = var_1022_shape_cast_fp16_to_uint16)[name = string("gather_52_cast_uint16")]; + string gather_52_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_52_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_219_axes_0 = const()[name = string("expand_dims_219_axes_0"), val = tensor([0])]; + int32 gather_52_cast_uint16_to_int32 = cast(dtype = gather_52_cast_uint16_to_int32_dtype_0, x = gather_52_cast_uint16)[name = string("cast_94")]; + tensor expand_dims_219 = expand_dims(axes = expand_dims_219_axes_0, x = gather_52_cast_uint16_to_int32)[name = string("expand_dims_219")]; + tensor concat_161 = const()[name = string("concat_161"), val = tensor([26, 0, 0, 0])]; + tensor concat_162_values0_0 = const()[name = string("concat_162_values0_0"), val = tensor([0])]; + tensor concat_162_values1_0 = const()[name = string("concat_162_values1_0"), val = tensor([0])]; + tensor concat_162_values3_0 = const()[name = string("concat_162_values3_0"), val = tensor([0])]; + int32 concat_162_axis_0 = const()[name = string("concat_162_axis_0"), val = int32(0)]; + bool concat_162_interleave_0 = const()[name = string("concat_162_interleave_0"), val = bool(false)]; + tensor concat_162 = concat(axis = concat_162_axis_0, interleave = concat_162_interleave_0, values = (concat_162_values0_0, concat_162_values1_0, expand_dims_219, concat_162_values3_0))[name = string("concat_162")]; + tensor k_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_161, begin_mask = k_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_162, end_mask = k_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_27_stride_0, update = linear_52_cast_fp16, x = coreml_update_state_118)[name = string("k_cache2_internal_tensor_assign_27_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_27_cast_fp16, input = k_cache2)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_120 = read_state(input = k_cache2)[name = string("coreml_update_state_120")]; + tensor var_1027_shape_cast_fp16 = shape(x = linear_53_cast_fp16)[name = string("op_1027_shape_cast_fp16")]; + int32 gather_53_axis_0 = const()[name = string("gather_53_axis_0"), val = int32(0)]; + int32 gather_53_batch_dims_0 = const()[name = string("gather_53_batch_dims_0"), val = int32(0)]; + bool gather_53_validate_indices_0 = const()[name = string("gather_53_validate_indices_0"), val = bool(false)]; + string var_1027_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1027_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_53_to_uint16 = const()[name = string("select_53_to_uint16"), val = uint16(1)]; + tensor var_1027_shape_cast_fp16_to_uint16 = cast(dtype = var_1027_shape_cast_fp16_to_uint16_dtype_0, x = var_1027_shape_cast_fp16)[name = string("cast_93")]; + uint16 gather_53_cast_uint16 = gather(axis = gather_53_axis_0, batch_dims = gather_53_batch_dims_0, indices = select_53_to_uint16, validate_indices = gather_53_validate_indices_0, x = var_1027_shape_cast_fp16_to_uint16)[name = string("gather_53_cast_uint16")]; + string gather_53_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_53_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_223_axes_0 = const()[name = string("expand_dims_223_axes_0"), val = tensor([0])]; + int32 gather_53_cast_uint16_to_int32 = cast(dtype = gather_53_cast_uint16_to_int32_dtype_0, x = gather_53_cast_uint16)[name = string("cast_92")]; + tensor expand_dims_223 = expand_dims(axes = expand_dims_223_axes_0, x = gather_53_cast_uint16_to_int32)[name = string("expand_dims_223")]; + tensor concat_164 = const()[name = string("concat_164"), val = tensor([26, 0, 0, 0])]; + tensor concat_165_values0_0 = const()[name = string("concat_165_values0_0"), val = tensor([0])]; + tensor concat_165_values1_0 = const()[name = string("concat_165_values1_0"), val = tensor([0])]; + tensor concat_165_values3_0 = const()[name = string("concat_165_values3_0"), val = tensor([0])]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (concat_165_values0_0, concat_165_values1_0, expand_dims_223, concat_165_values3_0))[name = string("concat_165")]; + tensor v_cache2_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_164, begin_mask = v_cache2_internal_tensor_assign_27_begin_mask_0, end = concat_165, end_mask = v_cache2_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_27_stride_0, update = linear_53_cast_fp16, x = coreml_update_state_119)[name = string("v_cache2_internal_tensor_assign_27_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_27_cast_fp16, input = v_cache2)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_121 = read_state(input = v_cache2)[name = string("coreml_update_state_121")]; + tensor var_1049_to_fp16 = const()[name = string("op_1049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213724416)))]; + tensor linear_54_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1049_to_fp16, x = audio_data)[name = string("linear_54_cast_fp16")]; + tensor var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217001280)))]; + tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220278144)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = audio_data)[name = string("linear_55_cast_fp16")]; + tensor var_1056_shape_cast_fp16 = shape(x = linear_54_cast_fp16)[name = string("op_1056_shape_cast_fp16")]; + int32 gather_54_axis_0 = const()[name = string("gather_54_axis_0"), val = int32(0)]; + int32 gather_54_batch_dims_0 = const()[name = string("gather_54_batch_dims_0"), val = int32(0)]; + bool gather_54_validate_indices_0 = const()[name = string("gather_54_validate_indices_0"), val = bool(false)]; + string var_1056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_54_to_uint16 = const()[name = string("select_54_to_uint16"), val = uint16(1)]; + tensor var_1056_shape_cast_fp16_to_uint16 = cast(dtype = var_1056_shape_cast_fp16_to_uint16_dtype_0, x = var_1056_shape_cast_fp16)[name = string("cast_91")]; + uint16 gather_54_cast_uint16 = gather(axis = gather_54_axis_0, batch_dims = gather_54_batch_dims_0, indices = select_54_to_uint16, validate_indices = gather_54_validate_indices_0, x = var_1056_shape_cast_fp16_to_uint16)[name = string("gather_54_cast_uint16")]; + string gather_54_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_54_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; + int32 gather_54_cast_uint16_to_int32 = cast(dtype = gather_54_cast_uint16_to_int32_dtype_0, x = gather_54_cast_uint16)[name = string("cast_90")]; + tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = gather_54_cast_uint16_to_int32)[name = string("expand_dims_227")]; + tensor concat_167 = const()[name = string("concat_167"), val = tensor([27, 0, 0, 0])]; + tensor concat_168_values0_0 = const()[name = string("concat_168_values0_0"), val = tensor([0])]; + tensor concat_168_values1_0 = const()[name = string("concat_168_values1_0"), val = tensor([0])]; + tensor concat_168_values3_0 = const()[name = string("concat_168_values3_0"), val = tensor([0])]; + int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; + bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; + tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (concat_168_values0_0, concat_168_values1_0, expand_dims_227, concat_168_values3_0))[name = string("concat_168")]; + tensor k_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_167, begin_mask = k_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_168, end_mask = k_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_28_stride_0, update = linear_54_cast_fp16, x = coreml_update_state_120)[name = string("k_cache2_internal_tensor_assign_28_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_28_cast_fp16, input = k_cache2)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_122 = read_state(input = k_cache2)[name = string("coreml_update_state_122")]; + tensor var_1061_shape_cast_fp16 = shape(x = linear_55_cast_fp16)[name = string("op_1061_shape_cast_fp16")]; + int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; + int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; + bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; + string var_1061_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1061_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(1)]; + tensor var_1061_shape_cast_fp16_to_uint16 = cast(dtype = var_1061_shape_cast_fp16_to_uint16_dtype_0, x = var_1061_shape_cast_fp16)[name = string("cast_89")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_1061_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; + string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_231_axes_0 = const()[name = string("expand_dims_231_axes_0"), val = tensor([0])]; + int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_88")]; + tensor expand_dims_231 = expand_dims(axes = expand_dims_231_axes_0, x = gather_55_cast_uint16_to_int32)[name = string("expand_dims_231")]; + tensor concat_170 = const()[name = string("concat_170"), val = tensor([27, 0, 0, 0])]; + tensor concat_171_values0_0 = const()[name = string("concat_171_values0_0"), val = tensor([0])]; + tensor concat_171_values1_0 = const()[name = string("concat_171_values1_0"), val = tensor([0])]; + tensor concat_171_values3_0 = const()[name = string("concat_171_values3_0"), val = tensor([0])]; + int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; + bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; + tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (concat_171_values0_0, concat_171_values1_0, expand_dims_231, concat_171_values3_0))[name = string("concat_171")]; + tensor v_cache2_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_170, begin_mask = v_cache2_internal_tensor_assign_28_begin_mask_0, end = concat_171, end_mask = v_cache2_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_28_stride_0, update = linear_55_cast_fp16, x = coreml_update_state_121)[name = string("v_cache2_internal_tensor_assign_28_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_28_cast_fp16, input = v_cache2)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_123 = read_state(input = v_cache2)[name = string("coreml_update_state_123")]; + tensor var_1083_to_fp16 = const()[name = string("op_1083_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220280768)))]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1083_to_fp16, x = audio_data)[name = string("linear_56_cast_fp16")]; + tensor var_1087_to_fp16 = const()[name = string("op_1087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223557632)))]; + tensor var_1088_to_fp16 = const()[name = string("op_1088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226834496)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1088_to_fp16, weight = var_1087_to_fp16, x = audio_data)[name = string("linear_57_cast_fp16")]; + tensor var_1090_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1090_shape_cast_fp16")]; + int32 gather_56_axis_0 = const()[name = string("gather_56_axis_0"), val = int32(0)]; + int32 gather_56_batch_dims_0 = const()[name = string("gather_56_batch_dims_0"), val = int32(0)]; + bool gather_56_validate_indices_0 = const()[name = string("gather_56_validate_indices_0"), val = bool(false)]; + string var_1090_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1090_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_56_to_uint16 = const()[name = string("select_56_to_uint16"), val = uint16(1)]; + tensor var_1090_shape_cast_fp16_to_uint16 = cast(dtype = var_1090_shape_cast_fp16_to_uint16_dtype_0, x = var_1090_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_56_cast_uint16 = gather(axis = gather_56_axis_0, batch_dims = gather_56_batch_dims_0, indices = select_56_to_uint16, validate_indices = gather_56_validate_indices_0, x = var_1090_shape_cast_fp16_to_uint16)[name = string("gather_56_cast_uint16")]; + string gather_56_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_56_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_235_axes_0 = const()[name = string("expand_dims_235_axes_0"), val = tensor([0])]; + int32 gather_56_cast_uint16_to_int32 = cast(dtype = gather_56_cast_uint16_to_int32_dtype_0, x = gather_56_cast_uint16)[name = string("cast_86")]; + tensor expand_dims_235 = expand_dims(axes = expand_dims_235_axes_0, x = gather_56_cast_uint16_to_int32)[name = string("expand_dims_235")]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([28, 0, 0, 0])]; + tensor concat_174_values0_0 = const()[name = string("concat_174_values0_0"), val = tensor([0])]; + tensor concat_174_values1_0 = const()[name = string("concat_174_values1_0"), val = tensor([0])]; + tensor concat_174_values3_0 = const()[name = string("concat_174_values3_0"), val = tensor([0])]; + int32 concat_174_axis_0 = const()[name = string("concat_174_axis_0"), val = int32(0)]; + bool concat_174_interleave_0 = const()[name = string("concat_174_interleave_0"), val = bool(false)]; + tensor concat_174 = concat(axis = concat_174_axis_0, interleave = concat_174_interleave_0, values = (concat_174_values0_0, concat_174_values1_0, expand_dims_235, concat_174_values3_0))[name = string("concat_174")]; + tensor k_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_173, begin_mask = k_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_174, end_mask = k_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_29_stride_0, update = linear_56_cast_fp16, x = coreml_update_state_122)[name = string("k_cache2_internal_tensor_assign_29_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_29_cast_fp16, input = k_cache2)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_124 = read_state(input = k_cache2)[name = string("coreml_update_state_124")]; + tensor var_1095_shape_cast_fp16 = shape(x = linear_57_cast_fp16)[name = string("op_1095_shape_cast_fp16")]; + int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; + int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; + bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; + string var_1095_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1095_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(1)]; + tensor var_1095_shape_cast_fp16_to_uint16 = cast(dtype = var_1095_shape_cast_fp16_to_uint16_dtype_0, x = var_1095_shape_cast_fp16)[name = string("cast_85")]; + uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_1095_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; + string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_239_axes_0 = const()[name = string("expand_dims_239_axes_0"), val = tensor([0])]; + int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_84")]; + tensor expand_dims_239 = expand_dims(axes = expand_dims_239_axes_0, x = gather_57_cast_uint16_to_int32)[name = string("expand_dims_239")]; + tensor concat_176 = const()[name = string("concat_176"), val = tensor([28, 0, 0, 0])]; + tensor concat_177_values0_0 = const()[name = string("concat_177_values0_0"), val = tensor([0])]; + tensor concat_177_values1_0 = const()[name = string("concat_177_values1_0"), val = tensor([0])]; + tensor concat_177_values3_0 = const()[name = string("concat_177_values3_0"), val = tensor([0])]; + int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)]; + bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)]; + tensor concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (concat_177_values0_0, concat_177_values1_0, expand_dims_239, concat_177_values3_0))[name = string("concat_177")]; + tensor v_cache2_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_176, begin_mask = v_cache2_internal_tensor_assign_29_begin_mask_0, end = concat_177, end_mask = v_cache2_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_29_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_123)[name = string("v_cache2_internal_tensor_assign_29_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_29_cast_fp16, input = v_cache2)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_125 = read_state(input = v_cache2)[name = string("coreml_update_state_125")]; + tensor var_1117_to_fp16 = const()[name = string("op_1117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226837120)))]; + tensor linear_58_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1117_to_fp16, x = audio_data)[name = string("linear_58_cast_fp16")]; + tensor var_1121_to_fp16 = const()[name = string("op_1121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230113984)))]; + tensor var_1122_to_fp16 = const()[name = string("op_1122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233390848)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1122_to_fp16, weight = var_1121_to_fp16, x = audio_data)[name = string("linear_59_cast_fp16")]; + tensor var_1124_shape_cast_fp16 = shape(x = linear_58_cast_fp16)[name = string("op_1124_shape_cast_fp16")]; + int32 gather_58_axis_0 = const()[name = string("gather_58_axis_0"), val = int32(0)]; + int32 gather_58_batch_dims_0 = const()[name = string("gather_58_batch_dims_0"), val = int32(0)]; + bool gather_58_validate_indices_0 = const()[name = string("gather_58_validate_indices_0"), val = bool(false)]; + string var_1124_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1124_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_58_to_uint16 = const()[name = string("select_58_to_uint16"), val = uint16(1)]; + tensor var_1124_shape_cast_fp16_to_uint16 = cast(dtype = var_1124_shape_cast_fp16_to_uint16_dtype_0, x = var_1124_shape_cast_fp16)[name = string("cast_83")]; + uint16 gather_58_cast_uint16 = gather(axis = gather_58_axis_0, batch_dims = gather_58_batch_dims_0, indices = select_58_to_uint16, validate_indices = gather_58_validate_indices_0, x = var_1124_shape_cast_fp16_to_uint16)[name = string("gather_58_cast_uint16")]; + string gather_58_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_58_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; + int32 gather_58_cast_uint16_to_int32 = cast(dtype = gather_58_cast_uint16_to_int32_dtype_0, x = gather_58_cast_uint16)[name = string("cast_82")]; + tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = gather_58_cast_uint16_to_int32)[name = string("expand_dims_243")]; + tensor concat_179 = const()[name = string("concat_179"), val = tensor([29, 0, 0, 0])]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([0])]; + tensor concat_180_values1_0 = const()[name = string("concat_180_values1_0"), val = tensor([0])]; + tensor concat_180_values3_0 = const()[name = string("concat_180_values3_0"), val = tensor([0])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, concat_180_values1_0, expand_dims_243, concat_180_values3_0))[name = string("concat_180")]; + tensor k_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_179, begin_mask = k_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_180, end_mask = k_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_30_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_124)[name = string("k_cache2_internal_tensor_assign_30_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_30_cast_fp16, input = k_cache2)[name = string("coreml_update_state_126_write_state")]; + tensor coreml_update_state_126 = read_state(input = k_cache2)[name = string("coreml_update_state_126")]; + tensor var_1129_shape_cast_fp16 = shape(x = linear_59_cast_fp16)[name = string("op_1129_shape_cast_fp16")]; + int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; + int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; + bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; + string var_1129_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1129_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; + tensor var_1129_shape_cast_fp16_to_uint16 = cast(dtype = var_1129_shape_cast_fp16_to_uint16_dtype_0, x = var_1129_shape_cast_fp16)[name = string("cast_81")]; + uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_1129_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; + string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_247_axes_0 = const()[name = string("expand_dims_247_axes_0"), val = tensor([0])]; + int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_80")]; + tensor expand_dims_247 = expand_dims(axes = expand_dims_247_axes_0, x = gather_59_cast_uint16_to_int32)[name = string("expand_dims_247")]; + tensor concat_182 = const()[name = string("concat_182"), val = tensor([29, 0, 0, 0])]; + tensor concat_183_values0_0 = const()[name = string("concat_183_values0_0"), val = tensor([0])]; + tensor concat_183_values1_0 = const()[name = string("concat_183_values1_0"), val = tensor([0])]; + tensor concat_183_values3_0 = const()[name = string("concat_183_values3_0"), val = tensor([0])]; + int32 concat_183_axis_0 = const()[name = string("concat_183_axis_0"), val = int32(0)]; + bool concat_183_interleave_0 = const()[name = string("concat_183_interleave_0"), val = bool(false)]; + tensor concat_183 = concat(axis = concat_183_axis_0, interleave = concat_183_interleave_0, values = (concat_183_values0_0, concat_183_values1_0, expand_dims_247, concat_183_values3_0))[name = string("concat_183")]; + tensor v_cache2_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_182, begin_mask = v_cache2_internal_tensor_assign_30_begin_mask_0, end = concat_183, end_mask = v_cache2_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_30_stride_0, update = linear_59_cast_fp16, x = coreml_update_state_125)[name = string("v_cache2_internal_tensor_assign_30_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_30_cast_fp16, input = v_cache2)[name = string("coreml_update_state_127_write_state")]; + tensor coreml_update_state_127 = read_state(input = v_cache2)[name = string("coreml_update_state_127")]; + tensor var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233393472)))]; + tensor linear_60_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1151_to_fp16, x = audio_data)[name = string("linear_60_cast_fp16")]; + tensor var_1155_to_fp16 = const()[name = string("op_1155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236670336)))]; + tensor var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239947200)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1156_to_fp16, weight = var_1155_to_fp16, x = audio_data)[name = string("linear_61_cast_fp16")]; + tensor var_1158_shape_cast_fp16 = shape(x = linear_60_cast_fp16)[name = string("op_1158_shape_cast_fp16")]; + int32 gather_60_axis_0 = const()[name = string("gather_60_axis_0"), val = int32(0)]; + int32 gather_60_batch_dims_0 = const()[name = string("gather_60_batch_dims_0"), val = int32(0)]; + bool gather_60_validate_indices_0 = const()[name = string("gather_60_validate_indices_0"), val = bool(false)]; + string var_1158_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1158_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_60_to_uint16 = const()[name = string("select_60_to_uint16"), val = uint16(1)]; + tensor var_1158_shape_cast_fp16_to_uint16 = cast(dtype = var_1158_shape_cast_fp16_to_uint16_dtype_0, x = var_1158_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_60_cast_uint16 = gather(axis = gather_60_axis_0, batch_dims = gather_60_batch_dims_0, indices = select_60_to_uint16, validate_indices = gather_60_validate_indices_0, x = var_1158_shape_cast_fp16_to_uint16)[name = string("gather_60_cast_uint16")]; + string gather_60_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_60_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_251_axes_0 = const()[name = string("expand_dims_251_axes_0"), val = tensor([0])]; + int32 gather_60_cast_uint16_to_int32 = cast(dtype = gather_60_cast_uint16_to_int32_dtype_0, x = gather_60_cast_uint16)[name = string("cast_78")]; + tensor expand_dims_251 = expand_dims(axes = expand_dims_251_axes_0, x = gather_60_cast_uint16_to_int32)[name = string("expand_dims_251")]; + tensor concat_185 = const()[name = string("concat_185"), val = tensor([30, 0, 0, 0])]; + tensor concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = tensor([0])]; + tensor concat_186_values1_0 = const()[name = string("concat_186_values1_0"), val = tensor([0])]; + tensor concat_186_values3_0 = const()[name = string("concat_186_values3_0"), val = tensor([0])]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, concat_186_values1_0, expand_dims_251, concat_186_values3_0))[name = string("concat_186")]; + tensor k_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_185, begin_mask = k_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_186, end_mask = k_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_31_stride_0, update = linear_60_cast_fp16, x = coreml_update_state_126)[name = string("k_cache2_internal_tensor_assign_31_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_31_cast_fp16, input = k_cache2)[name = string("coreml_update_state_128_write_state")]; + tensor coreml_update_state_128 = read_state(input = k_cache2)[name = string("coreml_update_state_128")]; + tensor var_1163_shape_cast_fp16 = shape(x = linear_61_cast_fp16)[name = string("op_1163_shape_cast_fp16")]; + int32 gather_61_axis_0 = const()[name = string("gather_61_axis_0"), val = int32(0)]; + int32 gather_61_batch_dims_0 = const()[name = string("gather_61_batch_dims_0"), val = int32(0)]; + bool gather_61_validate_indices_0 = const()[name = string("gather_61_validate_indices_0"), val = bool(false)]; + string var_1163_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1163_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_61_to_uint16 = const()[name = string("select_61_to_uint16"), val = uint16(1)]; + tensor var_1163_shape_cast_fp16_to_uint16 = cast(dtype = var_1163_shape_cast_fp16_to_uint16_dtype_0, x = var_1163_shape_cast_fp16)[name = string("cast_77")]; + uint16 gather_61_cast_uint16 = gather(axis = gather_61_axis_0, batch_dims = gather_61_batch_dims_0, indices = select_61_to_uint16, validate_indices = gather_61_validate_indices_0, x = var_1163_shape_cast_fp16_to_uint16)[name = string("gather_61_cast_uint16")]; + string gather_61_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_61_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_255_axes_0 = const()[name = string("expand_dims_255_axes_0"), val = tensor([0])]; + int32 gather_61_cast_uint16_to_int32 = cast(dtype = gather_61_cast_uint16_to_int32_dtype_0, x = gather_61_cast_uint16)[name = string("cast_76")]; + tensor expand_dims_255 = expand_dims(axes = expand_dims_255_axes_0, x = gather_61_cast_uint16_to_int32)[name = string("expand_dims_255")]; + tensor concat_188 = const()[name = string("concat_188"), val = tensor([30, 0, 0, 0])]; + tensor concat_189_values0_0 = const()[name = string("concat_189_values0_0"), val = tensor([0])]; + tensor concat_189_values1_0 = const()[name = string("concat_189_values1_0"), val = tensor([0])]; + tensor concat_189_values3_0 = const()[name = string("concat_189_values3_0"), val = tensor([0])]; + int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; + bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; + tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (concat_189_values0_0, concat_189_values1_0, expand_dims_255, concat_189_values3_0))[name = string("concat_189")]; + tensor v_cache2_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_188, begin_mask = v_cache2_internal_tensor_assign_31_begin_mask_0, end = concat_189, end_mask = v_cache2_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_31_stride_0, update = linear_61_cast_fp16, x = coreml_update_state_127)[name = string("v_cache2_internal_tensor_assign_31_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_31_cast_fp16, input = v_cache2)[name = string("coreml_update_state_129_write_state")]; + tensor coreml_update_state_129 = read_state(input = v_cache2)[name = string("coreml_update_state_129")]; + tensor var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239949824)))]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_1185_to_fp16, x = audio_data)[name = string("linear_62_cast_fp16")]; + tensor var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243226688)))]; + tensor var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246503552)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1190_to_fp16, weight = var_1189_to_fp16, x = audio_data)[name = string("linear_63_cast_fp16")]; + tensor var_1192_shape_cast_fp16 = shape(x = linear_62_cast_fp16)[name = string("op_1192_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1192_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1192_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1192_shape_cast_fp16_to_uint16 = cast(dtype = var_1192_shape_cast_fp16_to_uint16_dtype_0, x = var_1192_shape_cast_fp16)[name = string("cast_75")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1192_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_74")]; + tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = gather_62_cast_uint16_to_int32)[name = string("expand_dims_259")]; + tensor concat_191 = const()[name = string("concat_191"), val = tensor([31, 0, 0, 0])]; + tensor concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = tensor([0])]; + tensor concat_192_values1_0 = const()[name = string("concat_192_values1_0"), val = tensor([0])]; + tensor concat_192_values3_0 = const()[name = string("concat_192_values3_0"), val = tensor([0])]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, concat_192_values1_0, expand_dims_259, concat_192_values3_0))[name = string("concat_192")]; + tensor k_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_191, begin_mask = k_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_192, end_mask = k_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_32_stride_0, update = linear_62_cast_fp16, x = coreml_update_state_128)[name = string("k_cache2_internal_tensor_assign_32_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_32_cast_fp16, input = k_cache2)[name = string("coreml_update_state_130_write_state")]; + tensor var_1197_shape_cast_fp16 = shape(x = linear_63_cast_fp16)[name = string("op_1197_shape_cast_fp16")]; + int32 gather_63_axis_0 = const()[name = string("gather_63_axis_0"), val = int32(0)]; + int32 gather_63_batch_dims_0 = const()[name = string("gather_63_batch_dims_0"), val = int32(0)]; + bool gather_63_validate_indices_0 = const()[name = string("gather_63_validate_indices_0"), val = bool(false)]; + string var_1197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_63_to_uint16 = const()[name = string("select_63_to_uint16"), val = uint16(1)]; + tensor var_1197_shape_cast_fp16_to_uint16 = cast(dtype = var_1197_shape_cast_fp16_to_uint16_dtype_0, x = var_1197_shape_cast_fp16)[name = string("cast_73")]; + uint16 gather_63_cast_uint16 = gather(axis = gather_63_axis_0, batch_dims = gather_63_batch_dims_0, indices = select_63_to_uint16, validate_indices = gather_63_validate_indices_0, x = var_1197_shape_cast_fp16_to_uint16)[name = string("gather_63_cast_uint16")]; + string gather_63_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_63_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_263_axes_0 = const()[name = string("expand_dims_263_axes_0"), val = tensor([0])]; + int32 gather_63_cast_uint16_to_int32 = cast(dtype = gather_63_cast_uint16_to_int32_dtype_0, x = gather_63_cast_uint16)[name = string("cast_72")]; + tensor expand_dims_263 = expand_dims(axes = expand_dims_263_axes_0, x = gather_63_cast_uint16_to_int32)[name = string("expand_dims_263")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([31, 0, 0, 0])]; + tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([0])]; + tensor concat_195_values1_0 = const()[name = string("concat_195_values1_0"), val = tensor([0])]; + tensor concat_195_values3_0 = const()[name = string("concat_195_values3_0"), val = tensor([0])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, concat_195_values1_0, expand_dims_263, concat_195_values3_0))[name = string("concat_195")]; + tensor v_cache2_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_194, begin_mask = v_cache2_internal_tensor_assign_32_begin_mask_0, end = concat_195, end_mask = v_cache2_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_32_stride_0, update = linear_63_cast_fp16, x = coreml_update_state_129)[name = string("v_cache2_internal_tensor_assign_32_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_32_cast_fp16, input = v_cache2)[name = string("coreml_update_state_131_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/large-v3/decoder_first.mlmodelc/weights/weight.bin b/large-v3/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb46e5fd8b19039b4494818092abc9e777f54b54 --- /dev/null +++ b/large-v3/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346ed969f2a1ddb144c4add194c7b2a9a7d7b4a2e536d1e4a2afbfe5a4f62818 +size 246506176 diff --git a/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin b/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..259e441785b4f9b0bd793e92eb8ef632348844f2 --- /dev/null +++ b/large-v3/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4f1e5461c9555f7720e35c3cf749dee1a467277881458a87b7f2d35016831c +size 243 diff --git a/large-v3/decoder_second.mlmodelc/coremldata.bin b/large-v3/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..06d384eab53d9890f25a9f07a76b3771dcd2b170 --- /dev/null +++ b/large-v3/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5601244df54c60a16c26b761742867d06c6ef440ab8b0776ce5f6d1b4875c95 +size 487 diff --git a/large-v3/decoder_second.mlmodelc/metadata.json b/large-v3/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..722e4912d37eb8c71f0d55eb4ea48b33db80210d --- /dev/null +++ b/large-v3/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 257, + "Ios18.readState" : 66, + "Ios18.expandDims" : 33, + "Ios18.sub" : 1, + "Ios18.matmul" : 128, + "Ios18.gelu" : 32, + "Ios18.gather" : 35, + "Ios18.concat" : 162, + "Shape" : 34, + "Ios18.add" : 161, + "Ios18.sliceUpdate" : 128, + "Ios18.sliceByIndex" : 257, + "Ios18.layerNorm" : 97, + "Ios18.cast" : 68, + "Ios18.transpose" : 256, + "Ios18.writeState" : 64, + "Ios18.reshape" : 256, + "Ios18.softmax" : 64, + "Ios18.mul" : 128 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 448 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 448, 1280]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 32 × 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[32, 1, 1500, 1280]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v3/decoder_second.mlmodelc/model.mil b/large-v3/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..c7037f845098a20f94eecbcc56cf6eafa648d786 --- /dev/null +++ b/large-v3/decoder_second.mlmodelc/model.mil @@ -0,0 +1,6298 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_78_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_78_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_78_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_78_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_78_shape_cast_fp16_to_int16 = cast(dtype = var_78_shape_cast_fp16_to_int16_dtype_0, x = var_78_shape_cast_fp16)[name = string("cast_394")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_78_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_82_shape = shape(x = token_data)[name = string("op_82_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_82_shape_to_uint16_dtype_0 = const()[name = string("op_82_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_82_shape_to_uint16 = cast(dtype = var_82_shape_to_uint16_dtype_0, x = var_82_shape)[name = string("cast_392")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_82_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_391")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_393")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_154_axis_0 = const()[name = string("op_154_axis_0"), val = int32(0)]; + int32 var_154_batch_dims_0 = const()[name = string("op_154_batch_dims_0"), val = int32(0)]; + bool var_154_validate_indices_0 = const()[name = string("op_154_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_154_cast_fp16 = gather(axis = var_154_axis_0, batch_dims = var_154_batch_dims_0, indices = token_data, validate_indices = var_154_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_154_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1280)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_157_end_mask_0 = const()[name = string("op_157_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))]; + tensor var_157_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_157_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_157_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_154_cast_fp16, y = var_157_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 1280])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 1280])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_180 = const()[name = string("op_180"), val = int32(-1)]; + tensor var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133926656)))]; + fp16 var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_198_cast_fp16 = layer_norm(axes = var_198_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_198_cast_fp16")]; + tensor var_209_to_fp16 = const()[name = string("op_209_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133929280)))]; + tensor var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137206144)))]; + tensor linear_0_cast_fp16 = linear(bias = var_210_to_fp16, weight = var_209_to_fp16, x = var_198_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137208768)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140485632)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_213_to_fp16, x = var_198_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140488256)))]; + tensor var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143765120)))]; + tensor linear_2_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_198_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_220_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_220_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_220_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_220_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_220_shape_cast_fp16_to_uint16 = cast(dtype = var_220_shape_cast_fp16_to_uint16_dtype_0, x = var_220_shape_cast_fp16)[name = string("cast_390")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_220_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_389")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1280)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor([0, 0, 0])]; + tensor var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor([true, false, true])]; + tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_10, end_mask = var_236_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_236_cast_fp16")]; + tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 0])]; + tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([true, false, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = concat_10, end_mask = var_239_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 20, 64])]; + tensor var_249_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")]; + tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_249_cast_fp16, y = const_160_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 20, 64])]; + tensor var_256_cast_fp16 = reshape(shape = concat_13x, x = var_236_cast_fp16)[name = string("op_256_cast_fp16")]; + tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_256_cast_fp16, y = const_161_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 20, 64])]; + tensor var_263_cast_fp16 = reshape(shape = concat_14x, x = var_239_cast_fp16)[name = string("op_263_cast_fp16")]; + tensor var_264 = const()[name = string("op_264"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_258 = transpose(perm = transpose_258_perm_0, x = k_5_cast_fp16)[name = string("transpose_638")]; + tensor transpose_257 = transpose(perm = transpose_257_perm_0, x = q_3_cast_fp16)[name = string("transpose_639")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_267_begin_0 = const()[name = string("op_267_begin_0"), val = tensor([0, 0])]; + tensor var_267_end_mask_0 = const()[name = string("op_267_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143767744)))]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = concat_15, end_mask = var_267_end_mask_0, x = mask_to_fp16)[name = string("op_267_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_268_begin_0 = const()[name = string("op_268_begin_0"), val = tensor([0, 0])]; + tensor var_268_end_mask_0 = const()[name = string("op_268_end_mask_0"), val = tensor([true, false])]; + tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = concat_16, end_mask = var_268_end_mask_0, x = var_267_cast_fp16)[name = string("op_268_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_268_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_271_cast_fp16 = softmax(axis = var_180, x = qk_3_cast_fp16)[name = string("op_271_cast_fp16")]; + bool var_273_transpose_x_0 = const()[name = string("op_273_transpose_x_0"), val = bool(false)]; + bool var_273_transpose_y_0 = const()[name = string("op_273_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_264, x = var_263_cast_fp16)[name = string("transpose_640")]; + tensor var_273_cast_fp16 = matmul(transpose_x = var_273_transpose_x_0, transpose_y = var_273_transpose_y_0, x = var_271_cast_fp16, y = v_5_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor var_274 = const()[name = string("op_274"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 1280])]; + tensor var_275_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_637")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_275_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144169216)))]; + tensor var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147446080)))]; + tensor linear_3_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_287_axes_0 = const()[name = string("op_287_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147448704)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147451328)))]; + tensor var_287_cast_fp16 = layer_norm(axes = var_287_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_287_cast_fp16")]; + tensor var_296_to_fp16 = const()[name = string("op_296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147453952)))]; + tensor var_297_to_fp16 = const()[name = string("op_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150730816)))]; + tensor linear_4_cast_fp16 = linear(bias = var_297_to_fp16, weight = var_296_to_fp16, x = var_287_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150733440)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 20, 64])]; + tensor var_317_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_317_cast_fp16")]; + tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_317_cast_fp16, y = const_162_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_323 = const()[name = string("op_323"), val = tensor([1, 1500, 20, -1])]; + tensor var_324_cast_fp16 = reshape(shape = var_323, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_324_cast_fp16")]; + tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_324_cast_fp16, y = const_163_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_330 = const()[name = string("op_330"), val = tensor([1, 1500, 20, -1])]; + tensor var_331_cast_fp16 = reshape(shape = var_330, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_331_cast_fp16")]; + tensor var_332 = const()[name = string("op_332"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_260 = transpose(perm = transpose_260_perm_0, x = k_9_cast_fp16)[name = string("transpose_634")]; + tensor transpose_259 = transpose(perm = transpose_259_perm_0, x = q_7_cast_fp16)[name = string("transpose_635")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_5_cast_fp16")]; + tensor var_336_cast_fp16 = softmax(axis = var_180, x = qk_5_cast_fp16)[name = string("op_336_cast_fp16")]; + bool var_338_transpose_x_0 = const()[name = string("op_338_transpose_x_0"), val = bool(false)]; + bool var_338_transpose_y_0 = const()[name = string("op_338_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_332, x = var_331_cast_fp16)[name = string("transpose_636")]; + tensor var_338_cast_fp16 = matmul(transpose_x = var_338_transpose_x_0, transpose_y = var_338_transpose_y_0, x = var_336_cast_fp16, y = v_9_cast_fp16)[name = string("op_338_cast_fp16")]; + tensor var_339 = const()[name = string("op_339"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 1280])]; + tensor var_340_cast_fp16 = transpose(perm = var_339, x = var_338_cast_fp16)[name = string("transpose_633")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_340_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154573504)))]; + tensor var_345_to_fp16 = const()[name = string("op_345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157850368)))]; + tensor linear_5_cast_fp16 = linear(bias = var_345_to_fp16, weight = var_344_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157852992)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157855616)))]; + tensor var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_186_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_352_cast_fp16")]; + tensor var_361_to_fp16 = const()[name = string("op_361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157858240)))]; + tensor var_362_to_fp16 = const()[name = string("op_362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170965504)))]; + tensor linear_6_cast_fp16 = linear(bias = var_362_to_fp16, weight = var_361_to_fp16, x = var_352_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170975808)))]; + tensor var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184083072)))]; + tensor linear_7_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 1280])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 1280])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_391 = const()[name = string("op_391"), val = int32(-1)]; + tensor var_409_axes_0 = const()[name = string("op_409_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184085696)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184088320)))]; + fp16 var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_409_cast_fp16 = layer_norm(axes = var_409_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_409_cast_fp16")]; + tensor var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184090944)))]; + tensor var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187367808)))]; + tensor linear_8_cast_fp16 = linear(bias = var_421_to_fp16, weight = var_420_to_fp16, x = var_409_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187370432)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_424_to_fp16, x = var_409_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190647296)))]; + tensor var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193924160)))]; + tensor linear_10_cast_fp16 = linear(bias = var_429_to_fp16, weight = var_428_to_fp16, x = var_409_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_431_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_431_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_431_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_431_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_431_shape_cast_fp16_to_uint16 = cast(dtype = var_431_shape_cast_fp16_to_uint16_dtype_0, x = var_431_shape_cast_fp16)[name = string("cast_388")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_431_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_387")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1280)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor([0, 0, 0])]; + tensor var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor([true, false, true])]; + tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_32, end_mask = var_447_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor var_450_begin_0 = const()[name = string("op_450_begin_0"), val = tensor([0, 0, 0])]; + tensor var_450_end_mask_0 = const()[name = string("op_450_end_mask_0"), val = tensor([true, false, true])]; + tensor var_450_cast_fp16 = slice_by_index(begin = var_450_begin_0, end = concat_32, end_mask = var_450_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_450_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 20, 64])]; + tensor var_460_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_460_cast_fp16")]; + tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_460_cast_fp16, y = const_164_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 20, 64])]; + tensor var_467_cast_fp16 = reshape(shape = concat_35x, x = var_447_cast_fp16)[name = string("op_467_cast_fp16")]; + tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_467_cast_fp16, y = const_165_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 20, 64])]; + tensor var_474_cast_fp16 = reshape(shape = concat_36x, x = var_450_cast_fp16)[name = string("op_474_cast_fp16")]; + tensor var_475 = const()[name = string("op_475"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_262 = transpose(perm = transpose_262_perm_0, x = k_15_cast_fp16)[name = string("transpose_630")]; + tensor transpose_261 = transpose(perm = transpose_261_perm_0, x = q_11_cast_fp16)[name = string("transpose_631")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_478_begin_0 = const()[name = string("op_478_begin_0"), val = tensor([0, 0])]; + tensor var_478_end_mask_0 = const()[name = string("op_478_end_mask_0"), val = tensor([false, true])]; + tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = concat_37, end_mask = var_478_end_mask_0, x = mask_to_fp16)[name = string("op_478_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_479_begin_0 = const()[name = string("op_479_begin_0"), val = tensor([0, 0])]; + tensor var_479_end_mask_0 = const()[name = string("op_479_end_mask_0"), val = tensor([true, false])]; + tensor var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = concat_38, end_mask = var_479_end_mask_0, x = var_478_cast_fp16)[name = string("op_479_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_479_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_482_cast_fp16 = softmax(axis = var_391, x = qk_9_cast_fp16)[name = string("op_482_cast_fp16")]; + bool var_484_transpose_x_0 = const()[name = string("op_484_transpose_x_0"), val = bool(false)]; + bool var_484_transpose_y_0 = const()[name = string("op_484_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_475, x = var_474_cast_fp16)[name = string("transpose_632")]; + tensor var_484_cast_fp16 = matmul(transpose_x = var_484_transpose_x_0, transpose_y = var_484_transpose_y_0, x = var_482_cast_fp16, y = v_15_cast_fp16)[name = string("op_484_cast_fp16")]; + tensor var_485 = const()[name = string("op_485"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 1280])]; + tensor var_486_cast_fp16 = transpose(perm = var_485, x = var_484_cast_fp16)[name = string("transpose_629")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_486_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_490_to_fp16 = const()[name = string("op_490_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193926784)))]; + tensor var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197203648)))]; + tensor linear_11_cast_fp16 = linear(bias = var_491_to_fp16, weight = var_490_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197206272)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197208896)))]; + tensor var_498_cast_fp16 = layer_norm(axes = var_498_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_498_cast_fp16")]; + tensor var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197211520)))]; + tensor var_508_to_fp16 = const()[name = string("op_508_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200488384)))]; + tensor linear_12_cast_fp16 = linear(bias = var_508_to_fp16, weight = var_507_to_fp16, x = var_498_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 20, 64])]; + tensor var_528_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_528_cast_fp16")]; + tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_528_cast_fp16, y = const_166_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_534 = const()[name = string("op_534"), val = tensor([1, 1500, 20, -1])]; + tensor var_535_cast_fp16 = reshape(shape = var_534, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_535_cast_fp16")]; + tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_535_cast_fp16, y = const_167_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_541 = const()[name = string("op_541"), val = tensor([1, 1500, 20, -1])]; + tensor var_542_cast_fp16 = reshape(shape = var_541, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_542_cast_fp16")]; + tensor var_543 = const()[name = string("op_543"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_264 = transpose(perm = transpose_264_perm_0, x = k_19_cast_fp16)[name = string("transpose_626")]; + tensor transpose_263 = transpose(perm = transpose_263_perm_0, x = q_15_cast_fp16)[name = string("transpose_627")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_11_cast_fp16")]; + tensor var_547_cast_fp16 = softmax(axis = var_391, x = qk_11_cast_fp16)[name = string("op_547_cast_fp16")]; + bool var_549_transpose_x_0 = const()[name = string("op_549_transpose_x_0"), val = bool(false)]; + bool var_549_transpose_y_0 = const()[name = string("op_549_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_543, x = var_542_cast_fp16)[name = string("transpose_628")]; + tensor var_549_cast_fp16 = matmul(transpose_x = var_549_transpose_x_0, transpose_y = var_549_transpose_y_0, x = var_547_cast_fp16, y = v_19_cast_fp16)[name = string("op_549_cast_fp16")]; + tensor var_550 = const()[name = string("op_550"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 1280])]; + tensor var_551_cast_fp16 = transpose(perm = var_550, x = var_549_cast_fp16)[name = string("transpose_625")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_551_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200491008)))]; + tensor var_556_to_fp16 = const()[name = string("op_556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203767872)))]; + tensor linear_13_cast_fp16 = linear(bias = var_556_to_fp16, weight = var_555_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203770496)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203773120)))]; + tensor var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_397_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = string("op_572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203775744)))]; + tensor var_573_to_fp16 = const()[name = string("op_573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216883008)))]; + tensor linear_14_cast_fp16 = linear(bias = var_573_to_fp16, weight = var_572_to_fp16, x = var_563_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_578_to_fp16 = const()[name = string("op_578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216893312)))]; + tensor var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230000576)))]; + tensor linear_15_cast_fp16 = linear(bias = var_579_to_fp16, weight = var_578_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 1280])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 1280])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_602 = const()[name = string("op_602"), val = int32(-1)]; + tensor var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230003200)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230005824)))]; + fp16 var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_620_cast_fp16")]; + tensor var_631_to_fp16 = const()[name = string("op_631_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230008448)))]; + tensor var_632_to_fp16 = const()[name = string("op_632_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233285312)))]; + tensor linear_16_cast_fp16 = linear(bias = var_632_to_fp16, weight = var_631_to_fp16, x = var_620_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_635_to_fp16 = const()[name = string("op_635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233287936)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_635_to_fp16, x = var_620_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236564800)))]; + tensor var_640_to_fp16 = const()[name = string("op_640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239841664)))]; + tensor linear_18_cast_fp16 = linear(bias = var_640_to_fp16, weight = var_639_to_fp16, x = var_620_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_642_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_642_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_642_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_642_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_642_shape_cast_fp16_to_uint16 = cast(dtype = var_642_shape_cast_fp16_to_uint16_dtype_0, x = var_642_shape_cast_fp16)[name = string("cast_386")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_642_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_385")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1280)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor([0, 0, 0])]; + tensor var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor([true, false, true])]; + tensor var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_54, end_mask = var_658_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_658_cast_fp16")]; + tensor var_661_begin_0 = const()[name = string("op_661_begin_0"), val = tensor([0, 0, 0])]; + tensor var_661_end_mask_0 = const()[name = string("op_661_end_mask_0"), val = tensor([true, false, true])]; + tensor var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = concat_54, end_mask = var_661_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_661_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 20, 64])]; + tensor var_671_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_671_cast_fp16")]; + tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_671_cast_fp16, y = const_168_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 20, 64])]; + tensor var_678_cast_fp16 = reshape(shape = concat_57x, x = var_658_cast_fp16)[name = string("op_678_cast_fp16")]; + tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_678_cast_fp16, y = const_169_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 20, 64])]; + tensor var_685_cast_fp16 = reshape(shape = concat_58x, x = var_661_cast_fp16)[name = string("op_685_cast_fp16")]; + tensor var_686 = const()[name = string("op_686"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_266 = transpose(perm = transpose_266_perm_0, x = k_25_cast_fp16)[name = string("transpose_622")]; + tensor transpose_265 = transpose(perm = transpose_265_perm_0, x = q_19_cast_fp16)[name = string("transpose_623")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_689_begin_0 = const()[name = string("op_689_begin_0"), val = tensor([0, 0])]; + tensor var_689_end_mask_0 = const()[name = string("op_689_end_mask_0"), val = tensor([false, true])]; + tensor var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = concat_59, end_mask = var_689_end_mask_0, x = mask_to_fp16)[name = string("op_689_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_690_begin_0 = const()[name = string("op_690_begin_0"), val = tensor([0, 0])]; + tensor var_690_end_mask_0 = const()[name = string("op_690_end_mask_0"), val = tensor([true, false])]; + tensor var_690_cast_fp16 = slice_by_index(begin = var_690_begin_0, end = concat_60, end_mask = var_690_end_mask_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_690_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_693_cast_fp16 = softmax(axis = var_602, x = qk_15_cast_fp16)[name = string("op_693_cast_fp16")]; + bool var_695_transpose_x_0 = const()[name = string("op_695_transpose_x_0"), val = bool(false)]; + bool var_695_transpose_y_0 = const()[name = string("op_695_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_686, x = var_685_cast_fp16)[name = string("transpose_624")]; + tensor var_695_cast_fp16 = matmul(transpose_x = var_695_transpose_x_0, transpose_y = var_695_transpose_y_0, x = var_693_cast_fp16, y = v_25_cast_fp16)[name = string("op_695_cast_fp16")]; + tensor var_696 = const()[name = string("op_696"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 1280])]; + tensor var_697_cast_fp16 = transpose(perm = var_696, x = var_695_cast_fp16)[name = string("transpose_621")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_697_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239844288)))]; + tensor var_702_to_fp16 = const()[name = string("op_702_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243121152)))]; + tensor linear_19_cast_fp16 = linear(bias = var_702_to_fp16, weight = var_701_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243123776)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243126400)))]; + tensor var_709_cast_fp16 = layer_norm(axes = var_709_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_709_cast_fp16")]; + tensor var_718_to_fp16 = const()[name = string("op_718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243129024)))]; + tensor var_719_to_fp16 = const()[name = string("op_719_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246405888)))]; + tensor linear_20_cast_fp16 = linear(bias = var_719_to_fp16, weight = var_718_to_fp16, x = var_709_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 20, 64])]; + tensor var_739_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_739_cast_fp16")]; + tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_739_cast_fp16, y = const_170_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_745 = const()[name = string("op_745"), val = tensor([1, 1500, 20, -1])]; + tensor var_746_cast_fp16 = reshape(shape = var_745, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_746_cast_fp16")]; + tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_746_cast_fp16, y = const_171_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_752 = const()[name = string("op_752"), val = tensor([1, 1500, 20, -1])]; + tensor var_753_cast_fp16 = reshape(shape = var_752, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_753_cast_fp16")]; + tensor var_754 = const()[name = string("op_754"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_268 = transpose(perm = transpose_268_perm_0, x = k_29_cast_fp16)[name = string("transpose_618")]; + tensor transpose_267 = transpose(perm = transpose_267_perm_0, x = q_23_cast_fp16)[name = string("transpose_619")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_17_cast_fp16")]; + tensor var_758_cast_fp16 = softmax(axis = var_602, x = qk_17_cast_fp16)[name = string("op_758_cast_fp16")]; + bool var_760_transpose_x_0 = const()[name = string("op_760_transpose_x_0"), val = bool(false)]; + bool var_760_transpose_y_0 = const()[name = string("op_760_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_754, x = var_753_cast_fp16)[name = string("transpose_620")]; + tensor var_760_cast_fp16 = matmul(transpose_x = var_760_transpose_x_0, transpose_y = var_760_transpose_y_0, x = var_758_cast_fp16, y = v_29_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor var_761 = const()[name = string("op_761"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 1280])]; + tensor var_762_cast_fp16 = transpose(perm = var_761, x = var_760_cast_fp16)[name = string("transpose_617")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_762_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246408512)))]; + tensor var_767_to_fp16 = const()[name = string("op_767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249685376)))]; + tensor linear_21_cast_fp16 = linear(bias = var_767_to_fp16, weight = var_766_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_774_axes_0 = const()[name = string("op_774_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249688000)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249690624)))]; + tensor var_774_cast_fp16 = layer_norm(axes = var_774_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_608_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_774_cast_fp16")]; + tensor var_783_to_fp16 = const()[name = string("op_783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249693248)))]; + tensor var_784_to_fp16 = const()[name = string("op_784_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262800512)))]; + tensor linear_22_cast_fp16 = linear(bias = var_784_to_fp16, weight = var_783_to_fp16, x = var_774_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_789_to_fp16 = const()[name = string("op_789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262810816)))]; + tensor var_790_to_fp16 = const()[name = string("op_790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275918080)))]; + tensor linear_23_cast_fp16 = linear(bias = var_790_to_fp16, weight = var_789_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 1280])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; + tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; + tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 1280])]; + tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; + int32 var_813 = const()[name = string("op_813"), val = int32(-1)]; + tensor var_831_axes_0 = const()[name = string("op_831_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275920704)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275923328)))]; + fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_831_cast_fp16 = layer_norm(axes = var_831_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_831_cast_fp16")]; + tensor var_842_to_fp16 = const()[name = string("op_842_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275925952)))]; + tensor var_843_to_fp16 = const()[name = string("op_843_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279202816)))]; + tensor linear_24_cast_fp16 = linear(bias = var_843_to_fp16, weight = var_842_to_fp16, x = var_831_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_846_to_fp16 = const()[name = string("op_846_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279205440)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_846_to_fp16, x = var_831_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282482304)))]; + tensor var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285759168)))]; + tensor linear_26_cast_fp16 = linear(bias = var_851_to_fp16, weight = var_850_to_fp16, x = var_831_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_853_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_853_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_384")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_383")]; + int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1280)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; + tensor var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor([0, 0, 0])]; + tensor var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor([true, false, true])]; + tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_76, end_mask = var_869_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor var_872_begin_0 = const()[name = string("op_872_begin_0"), val = tensor([0, 0, 0])]; + tensor var_872_end_mask_0 = const()[name = string("op_872_end_mask_0"), val = tensor([true, false, true])]; + tensor var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = concat_76, end_mask = var_872_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_872_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 20, 64])]; + tensor var_882_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_882_cast_fp16")]; + tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_882_cast_fp16, y = const_172_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 20, 64])]; + tensor var_889_cast_fp16 = reshape(shape = concat_79x, x = var_869_cast_fp16)[name = string("op_889_cast_fp16")]; + tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_889_cast_fp16, y = const_173_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 20, 64])]; + tensor var_896_cast_fp16 = reshape(shape = concat_80x, x = var_872_cast_fp16)[name = string("op_896_cast_fp16")]; + tensor var_897 = const()[name = string("op_897"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_270 = transpose(perm = transpose_270_perm_0, x = k_35_cast_fp16)[name = string("transpose_614")]; + tensor transpose_269 = transpose(perm = transpose_269_perm_0, x = q_27_cast_fp16)[name = string("transpose_615")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_900_begin_0 = const()[name = string("op_900_begin_0"), val = tensor([0, 0])]; + tensor var_900_end_mask_0 = const()[name = string("op_900_end_mask_0"), val = tensor([false, true])]; + tensor var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = concat_81, end_mask = var_900_end_mask_0, x = mask_to_fp16)[name = string("op_900_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_901_begin_0 = const()[name = string("op_901_begin_0"), val = tensor([0, 0])]; + tensor var_901_end_mask_0 = const()[name = string("op_901_end_mask_0"), val = tensor([true, false])]; + tensor var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = concat_82, end_mask = var_901_end_mask_0, x = var_900_cast_fp16)[name = string("op_901_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_901_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_904_cast_fp16 = softmax(axis = var_813, x = qk_21_cast_fp16)[name = string("op_904_cast_fp16")]; + bool var_906_transpose_x_0 = const()[name = string("op_906_transpose_x_0"), val = bool(false)]; + bool var_906_transpose_y_0 = const()[name = string("op_906_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_897, x = var_896_cast_fp16)[name = string("transpose_616")]; + tensor var_906_cast_fp16 = matmul(transpose_x = var_906_transpose_x_0, transpose_y = var_906_transpose_y_0, x = var_904_cast_fp16, y = v_35_cast_fp16)[name = string("op_906_cast_fp16")]; + tensor var_907 = const()[name = string("op_907"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 1280])]; + tensor var_908_cast_fp16 = transpose(perm = var_907, x = var_906_cast_fp16)[name = string("transpose_613")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_908_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_912_to_fp16 = const()[name = string("op_912_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285761792)))]; + tensor var_913_to_fp16 = const()[name = string("op_913_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289038656)))]; + tensor linear_27_cast_fp16 = linear(bias = var_913_to_fp16, weight = var_912_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_920_axes_0 = const()[name = string("op_920_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289041280)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289043904)))]; + tensor var_920_cast_fp16 = layer_norm(axes = var_920_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_920_cast_fp16")]; + tensor var_929_to_fp16 = const()[name = string("op_929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289046528)))]; + tensor var_930_to_fp16 = const()[name = string("op_930_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292323392)))]; + tensor linear_28_cast_fp16 = linear(bias = var_930_to_fp16, weight = var_929_to_fp16, x = var_920_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 20, 64])]; + tensor var_950_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_950_cast_fp16")]; + tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_950_cast_fp16, y = const_174_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_956 = const()[name = string("op_956"), val = tensor([1, 1500, 20, -1])]; + tensor var_957_cast_fp16 = reshape(shape = var_956, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_957_cast_fp16")]; + tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_957_cast_fp16, y = const_175_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_963 = const()[name = string("op_963"), val = tensor([1, 1500, 20, -1])]; + tensor var_964_cast_fp16 = reshape(shape = var_963, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_964_cast_fp16")]; + tensor var_965 = const()[name = string("op_965"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_272 = transpose(perm = transpose_272_perm_0, x = k_39_cast_fp16)[name = string("transpose_610")]; + tensor transpose_271 = transpose(perm = transpose_271_perm_0, x = q_31_cast_fp16)[name = string("transpose_611")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_23_cast_fp16")]; + tensor var_969_cast_fp16 = softmax(axis = var_813, x = qk_23_cast_fp16)[name = string("op_969_cast_fp16")]; + bool var_971_transpose_x_0 = const()[name = string("op_971_transpose_x_0"), val = bool(false)]; + bool var_971_transpose_y_0 = const()[name = string("op_971_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_965, x = var_964_cast_fp16)[name = string("transpose_612")]; + tensor var_971_cast_fp16 = matmul(transpose_x = var_971_transpose_x_0, transpose_y = var_971_transpose_y_0, x = var_969_cast_fp16, y = v_39_cast_fp16)[name = string("op_971_cast_fp16")]; + tensor var_972 = const()[name = string("op_972"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 1280])]; + tensor var_973_cast_fp16 = transpose(perm = var_972, x = var_971_cast_fp16)[name = string("transpose_609")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_973_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292326016)))]; + tensor var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295602880)))]; + tensor linear_29_cast_fp16 = linear(bias = var_978_to_fp16, weight = var_977_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295605504)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295608128)))]; + tensor var_985_cast_fp16 = layer_norm(axes = var_985_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_819_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_985_cast_fp16")]; + tensor var_994_to_fp16 = const()[name = string("op_994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295610752)))]; + tensor var_995_to_fp16 = const()[name = string("op_995_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308718016)))]; + tensor linear_30_cast_fp16 = linear(bias = var_995_to_fp16, weight = var_994_to_fp16, x = var_985_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308728320)))]; + tensor var_1001_to_fp16 = const()[name = string("op_1001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321835584)))]; + tensor linear_31_cast_fp16 = linear(bias = var_1001_to_fp16, weight = var_1000_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; + tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_17_cast_fp16")]; + tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 1280])]; + tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_17_cast_fp16")]; + tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; + tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; + tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 1280])]; + tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; + int32 var_1024 = const()[name = string("op_1024"), val = int32(-1)]; + tensor var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321838208)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321840832)))]; + fp16 var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1042_cast_fp16")]; + tensor var_1053_to_fp16 = const()[name = string("op_1053_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321843456)))]; + tensor var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325120320)))]; + tensor linear_32_cast_fp16 = linear(bias = var_1054_to_fp16, weight = var_1053_to_fp16, x = var_1042_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1057_to_fp16 = const()[name = string("op_1057_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325122944)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1057_to_fp16, x = var_1042_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328399808)))]; + tensor var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331676672)))]; + tensor linear_34_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1042_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor var_1064_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1064_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_382")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_381")]; + int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; + tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; + tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")]; + tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")]; + int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; + int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1280)]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; + tensor var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_98, end_mask = var_1080_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1080_cast_fp16")]; + tensor var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = concat_98, end_mask = var_1083_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1083_cast_fp16")]; + tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 20, 64])]; + tensor var_1093_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1093_cast_fp16")]; + tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1093_cast_fp16, y = const_176_to_fp16)[name = string("q_35_cast_fp16")]; + tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 20, 64])]; + tensor var_1100_cast_fp16 = reshape(shape = concat_101x, x = var_1080_cast_fp16)[name = string("op_1100_cast_fp16")]; + tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_45_cast_fp16 = mul(x = var_1100_cast_fp16, y = const_177_to_fp16)[name = string("k_45_cast_fp16")]; + tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 20, 64])]; + tensor var_1107_cast_fp16 = reshape(shape = concat_102x, x = var_1083_cast_fp16)[name = string("op_1107_cast_fp16")]; + tensor var_1108 = const()[name = string("op_1108"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_274 = transpose(perm = transpose_274_perm_0, x = k_45_cast_fp16)[name = string("transpose_606")]; + tensor transpose_273 = transpose(perm = transpose_273_perm_0, x = q_35_cast_fp16)[name = string("transpose_607")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_25_cast_fp16")]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; + tensor var_1111_begin_0 = const()[name = string("op_1111_begin_0"), val = tensor([0, 0])]; + tensor var_1111_end_mask_0 = const()[name = string("op_1111_end_mask_0"), val = tensor([false, true])]; + tensor var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = concat_103, end_mask = var_1111_end_mask_0, x = mask_to_fp16)[name = string("op_1111_cast_fp16")]; + int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; + tensor var_1112_begin_0 = const()[name = string("op_1112_begin_0"), val = tensor([0, 0])]; + tensor var_1112_end_mask_0 = const()[name = string("op_1112_end_mask_0"), val = tensor([true, false])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = concat_104, end_mask = var_1112_end_mask_0, x = var_1111_cast_fp16)[name = string("op_1112_cast_fp16")]; + tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1112_cast_fp16)[name = string("qk_27_cast_fp16")]; + tensor var_1115_cast_fp16 = softmax(axis = var_1024, x = qk_27_cast_fp16)[name = string("op_1115_cast_fp16")]; + bool var_1117_transpose_x_0 = const()[name = string("op_1117_transpose_x_0"), val = bool(false)]; + bool var_1117_transpose_y_0 = const()[name = string("op_1117_transpose_y_0"), val = bool(false)]; + tensor v_45_cast_fp16 = transpose(perm = var_1108, x = var_1107_cast_fp16)[name = string("transpose_608")]; + tensor var_1117_cast_fp16 = matmul(transpose_x = var_1117_transpose_x_0, transpose_y = var_1117_transpose_y_0, x = var_1115_cast_fp16, y = v_45_cast_fp16)[name = string("op_1117_cast_fp16")]; + tensor var_1118 = const()[name = string("op_1118"), val = tensor([0, 2, 1, 3])]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 1280])]; + tensor var_1119_cast_fp16 = transpose(perm = var_1118, x = var_1117_cast_fp16)[name = string("transpose_605")]; + tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1119_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor var_1123_to_fp16 = const()[name = string("op_1123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(331679296)))]; + tensor var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334956160)))]; + tensor linear_35_cast_fp16 = linear(bias = var_1124_to_fp16, weight = var_1123_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; + tensor var_1131_axes_0 = const()[name = string("op_1131_axes_0"), val = tensor([-1])]; + tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334958784)))]; + tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334961408)))]; + tensor var_1131_cast_fp16 = layer_norm(axes = var_1131_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1131_cast_fp16")]; + tensor var_1140_to_fp16 = const()[name = string("op_1140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334964032)))]; + tensor var_1141_to_fp16 = const()[name = string("op_1141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338240896)))]; + tensor linear_36_cast_fp16 = linear(bias = var_1141_to_fp16, weight = var_1140_to_fp16, x = var_1131_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; + tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; + tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; + tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 20, 64])]; + tensor var_1161_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1161_cast_fp16")]; + tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1161_cast_fp16, y = const_178_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1167 = const()[name = string("op_1167"), val = tensor([1, 1500, 20, -1])]; + tensor var_1168_cast_fp16 = reshape(shape = var_1167, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1168_cast_fp16")]; + tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_49_cast_fp16 = mul(x = var_1168_cast_fp16, y = const_179_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_1174 = const()[name = string("op_1174"), val = tensor([1, 1500, 20, -1])]; + tensor var_1175_cast_fp16 = reshape(shape = var_1174, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1175_cast_fp16")]; + tensor var_1176 = const()[name = string("op_1176"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_276 = transpose(perm = transpose_276_perm_0, x = k_49_cast_fp16)[name = string("transpose_602")]; + tensor transpose_275 = transpose(perm = transpose_275_perm_0, x = q_39_cast_fp16)[name = string("transpose_603")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_29_cast_fp16")]; + tensor var_1180_cast_fp16 = softmax(axis = var_1024, x = qk_29_cast_fp16)[name = string("op_1180_cast_fp16")]; + bool var_1182_transpose_x_0 = const()[name = string("op_1182_transpose_x_0"), val = bool(false)]; + bool var_1182_transpose_y_0 = const()[name = string("op_1182_transpose_y_0"), val = bool(false)]; + tensor v_49_cast_fp16 = transpose(perm = var_1176, x = var_1175_cast_fp16)[name = string("transpose_604")]; + tensor var_1182_cast_fp16 = matmul(transpose_x = var_1182_transpose_x_0, transpose_y = var_1182_transpose_y_0, x = var_1180_cast_fp16, y = v_49_cast_fp16)[name = string("op_1182_cast_fp16")]; + tensor var_1183 = const()[name = string("op_1183"), val = tensor([0, 2, 1, 3])]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 1280])]; + tensor var_1184_cast_fp16 = transpose(perm = var_1183, x = var_1182_cast_fp16)[name = string("transpose_601")]; + tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1184_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338243520)))]; + tensor var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341520384)))]; + tensor linear_37_cast_fp16 = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; + tensor var_1196_axes_0 = const()[name = string("op_1196_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341523008)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341525632)))]; + tensor var_1196_cast_fp16 = layer_norm(axes = var_1196_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_1030_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1196_cast_fp16")]; + tensor var_1205_to_fp16 = const()[name = string("op_1205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341528256)))]; + tensor var_1206_to_fp16 = const()[name = string("op_1206_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354635520)))]; + tensor linear_38_cast_fp16 = linear(bias = var_1206_to_fp16, weight = var_1205_to_fp16, x = var_1196_cast_fp16)[name = string("linear_38_cast_fp16")]; + string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; + tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354645824)))]; + tensor var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367753088)))]; + tensor linear_39_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; + tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; + tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_21_cast_fp16")]; + tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 1280])]; + tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_21_cast_fp16")]; + tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; + tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; + tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 1280])]; + tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; + int32 var_1235 = const()[name = string("op_1235"), val = int32(-1)]; + tensor var_1253_axes_0 = const()[name = string("op_1253_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367755712)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367758336)))]; + fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1253_cast_fp16 = layer_norm(axes = var_1253_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1253_cast_fp16")]; + tensor var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367760960)))]; + tensor var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371037824)))]; + tensor linear_40_cast_fp16 = linear(bias = var_1265_to_fp16, weight = var_1264_to_fp16, x = var_1253_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor var_1268_to_fp16 = const()[name = string("op_1268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371040448)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1268_to_fp16, x = var_1253_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374317312)))]; + tensor var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377594176)))]; + tensor linear_42_cast_fp16 = linear(bias = var_1273_to_fp16, weight = var_1272_to_fp16, x = var_1253_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_1275_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1275_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1275_shape_cast_fp16_to_uint16 = cast(dtype = var_1275_shape_cast_fp16_to_uint16_dtype_0, x = var_1275_shape_cast_fp16)[name = string("cast_380")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1275_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_379")]; + int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; + tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; + tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; + tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")]; + tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")]; + int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; + int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1280)]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; + tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_120, end_mask = var_1291_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor var_1294_begin_0 = const()[name = string("op_1294_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1294_end_mask_0 = const()[name = string("op_1294_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1294_cast_fp16 = slice_by_index(begin = var_1294_begin_0, end = concat_120, end_mask = var_1294_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1294_cast_fp16")]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 20, 64])]; + tensor var_1304_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1304_cast_fp16")]; + tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1304_cast_fp16, y = const_180_to_fp16)[name = string("q_43_cast_fp16")]; + tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 20, 64])]; + tensor var_1311_cast_fp16 = reshape(shape = concat_123x, x = var_1291_cast_fp16)[name = string("op_1311_cast_fp16")]; + tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1311_cast_fp16, y = const_181_to_fp16)[name = string("k_55_cast_fp16")]; + tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 20, 64])]; + tensor var_1318_cast_fp16 = reshape(shape = concat_124x, x = var_1294_cast_fp16)[name = string("op_1318_cast_fp16")]; + tensor var_1319 = const()[name = string("op_1319"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_278 = transpose(perm = transpose_278_perm_0, x = k_55_cast_fp16)[name = string("transpose_598")]; + tensor transpose_277 = transpose(perm = transpose_277_perm_0, x = q_43_cast_fp16)[name = string("transpose_599")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_31_cast_fp16")]; + int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; + tensor var_1322_begin_0 = const()[name = string("op_1322_begin_0"), val = tensor([0, 0])]; + tensor var_1322_end_mask_0 = const()[name = string("op_1322_end_mask_0"), val = tensor([false, true])]; + tensor var_1322_cast_fp16 = slice_by_index(begin = var_1322_begin_0, end = concat_125, end_mask = var_1322_end_mask_0, x = mask_to_fp16)[name = string("op_1322_cast_fp16")]; + int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; + tensor var_1323_begin_0 = const()[name = string("op_1323_begin_0"), val = tensor([0, 0])]; + tensor var_1323_end_mask_0 = const()[name = string("op_1323_end_mask_0"), val = tensor([true, false])]; + tensor var_1323_cast_fp16 = slice_by_index(begin = var_1323_begin_0, end = concat_126, end_mask = var_1323_end_mask_0, x = var_1322_cast_fp16)[name = string("op_1323_cast_fp16")]; + tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1323_cast_fp16)[name = string("qk_33_cast_fp16")]; + tensor var_1326_cast_fp16 = softmax(axis = var_1235, x = qk_33_cast_fp16)[name = string("op_1326_cast_fp16")]; + bool var_1328_transpose_x_0 = const()[name = string("op_1328_transpose_x_0"), val = bool(false)]; + bool var_1328_transpose_y_0 = const()[name = string("op_1328_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1319, x = var_1318_cast_fp16)[name = string("transpose_600")]; + tensor var_1328_cast_fp16 = matmul(transpose_x = var_1328_transpose_x_0, transpose_y = var_1328_transpose_y_0, x = var_1326_cast_fp16, y = v_55_cast_fp16)[name = string("op_1328_cast_fp16")]; + tensor var_1329 = const()[name = string("op_1329"), val = tensor([0, 2, 1, 3])]; + tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 1280])]; + tensor var_1330_cast_fp16 = transpose(perm = var_1329, x = var_1328_cast_fp16)[name = string("transpose_597")]; + tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1330_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1334_to_fp16 = const()[name = string("op_1334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377596800)))]; + tensor var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873664)))]; + tensor linear_43_cast_fp16 = linear(bias = var_1335_to_fp16, weight = var_1334_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_1342_axes_0 = const()[name = string("op_1342_axes_0"), val = tensor([-1])]; + tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380876288)))]; + tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380878912)))]; + tensor var_1342_cast_fp16 = layer_norm(axes = var_1342_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1342_cast_fp16")]; + tensor var_1351_to_fp16 = const()[name = string("op_1351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380881536)))]; + tensor var_1352_to_fp16 = const()[name = string("op_1352_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384158400)))]; + tensor linear_44_cast_fp16 = linear(bias = var_1352_to_fp16, weight = var_1351_to_fp16, x = var_1342_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; + tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; + tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 20, 64])]; + tensor var_1372_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1372_cast_fp16")]; + tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1372_cast_fp16, y = const_182_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1378 = const()[name = string("op_1378"), val = tensor([1, 1500, 20, -1])]; + tensor var_1379_cast_fp16 = reshape(shape = var_1378, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1379_cast_fp16")]; + tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1379_cast_fp16, y = const_183_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1385 = const()[name = string("op_1385"), val = tensor([1, 1500, 20, -1])]; + tensor var_1386_cast_fp16 = reshape(shape = var_1385, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1386_cast_fp16")]; + tensor var_1387 = const()[name = string("op_1387"), val = tensor([0, 2, 1, 3])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_280 = transpose(perm = transpose_280_perm_0, x = k_59_cast_fp16)[name = string("transpose_594")]; + tensor transpose_279 = transpose(perm = transpose_279_perm_0, x = q_47_cast_fp16)[name = string("transpose_595")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_35_cast_fp16")]; + tensor var_1391_cast_fp16 = softmax(axis = var_1235, x = qk_35_cast_fp16)[name = string("op_1391_cast_fp16")]; + bool var_1393_transpose_x_0 = const()[name = string("op_1393_transpose_x_0"), val = bool(false)]; + bool var_1393_transpose_y_0 = const()[name = string("op_1393_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1387, x = var_1386_cast_fp16)[name = string("transpose_596")]; + tensor var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_0, transpose_y = var_1393_transpose_y_0, x = var_1391_cast_fp16, y = v_59_cast_fp16)[name = string("op_1393_cast_fp16")]; + tensor var_1394 = const()[name = string("op_1394"), val = tensor([0, 2, 1, 3])]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 1280])]; + tensor var_1395_cast_fp16 = transpose(perm = var_1394, x = var_1393_cast_fp16)[name = string("transpose_593")]; + tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1395_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384161024)))]; + tensor var_1400_to_fp16 = const()[name = string("op_1400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387437888)))]; + tensor linear_45_cast_fp16 = linear(bias = var_1400_to_fp16, weight = var_1399_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_1407_axes_0 = const()[name = string("op_1407_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387440512)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387443136)))]; + tensor var_1407_cast_fp16 = layer_norm(axes = var_1407_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1241_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1407_cast_fp16")]; + tensor var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387445760)))]; + tensor var_1417_to_fp16 = const()[name = string("op_1417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400553024)))]; + tensor linear_46_cast_fp16 = linear(bias = var_1417_to_fp16, weight = var_1416_to_fp16, x = var_1407_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; + tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400563328)))]; + tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413670592)))]; + tensor linear_47_cast_fp16 = linear(bias = var_1423_to_fp16, weight = var_1422_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; + tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; + tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_25_cast_fp16")]; + tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 1280])]; + tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_25_cast_fp16")]; + tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; + tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; + tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 1280])]; + tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; + int32 var_1446 = const()[name = string("op_1446"), val = int32(-1)]; + tensor var_1464_axes_0 = const()[name = string("op_1464_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413673216)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413675840)))]; + fp16 var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1464_cast_fp16 = layer_norm(axes = var_1464_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1464_cast_fp16")]; + tensor var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413678464)))]; + tensor var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416955328)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = var_1464_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1479_to_fp16 = const()[name = string("op_1479_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416957952)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1479_to_fp16, x = var_1464_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420234816)))]; + tensor var_1484_to_fp16 = const()[name = string("op_1484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423511680)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1484_to_fp16, weight = var_1483_to_fp16, x = var_1464_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1486_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1486_shape_cast_fp16")]; + int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; + int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; + bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; + string var_1486_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1486_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; + tensor var_1486_shape_cast_fp16_to_uint16 = cast(dtype = var_1486_shape_cast_fp16_to_uint16_dtype_0, x = var_1486_shape_cast_fp16)[name = string("cast_378")]; + uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1486_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; + string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_377")]; + int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; + tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; + int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; + bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; + tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; + tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; + tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; + tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; + int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; + bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; + tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; + tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")]; + tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")]; + int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; + int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1280)]; + int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; + bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; + tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; + tensor var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_142, end_mask = var_1502_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1502_cast_fp16")]; + tensor var_1505_begin_0 = const()[name = string("op_1505_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1505_end_mask_0 = const()[name = string("op_1505_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1505_cast_fp16 = slice_by_index(begin = var_1505_begin_0, end = concat_142, end_mask = var_1505_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1505_cast_fp16")]; + tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 20, 64])]; + tensor var_1515_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1515_cast_fp16")]; + tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1515_cast_fp16, y = const_184_to_fp16)[name = string("q_51_cast_fp16")]; + tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 20, 64])]; + tensor var_1522_cast_fp16 = reshape(shape = concat_145x, x = var_1502_cast_fp16)[name = string("op_1522_cast_fp16")]; + tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_65_cast_fp16 = mul(x = var_1522_cast_fp16, y = const_185_to_fp16)[name = string("k_65_cast_fp16")]; + tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 20, 64])]; + tensor var_1529_cast_fp16 = reshape(shape = concat_146x, x = var_1505_cast_fp16)[name = string("op_1529_cast_fp16")]; + tensor var_1530 = const()[name = string("op_1530"), val = tensor([0, 2, 1, 3])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_282 = transpose(perm = transpose_282_perm_0, x = k_65_cast_fp16)[name = string("transpose_590")]; + tensor transpose_281 = transpose(perm = transpose_281_perm_0, x = q_51_cast_fp16)[name = string("transpose_591")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_37_cast_fp16")]; + int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; + tensor var_1533_begin_0 = const()[name = string("op_1533_begin_0"), val = tensor([0, 0])]; + tensor var_1533_end_mask_0 = const()[name = string("op_1533_end_mask_0"), val = tensor([false, true])]; + tensor var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = concat_147, end_mask = var_1533_end_mask_0, x = mask_to_fp16)[name = string("op_1533_cast_fp16")]; + int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; + tensor var_1534_begin_0 = const()[name = string("op_1534_begin_0"), val = tensor([0, 0])]; + tensor var_1534_end_mask_0 = const()[name = string("op_1534_end_mask_0"), val = tensor([true, false])]; + tensor var_1534_cast_fp16 = slice_by_index(begin = var_1534_begin_0, end = concat_148, end_mask = var_1534_end_mask_0, x = var_1533_cast_fp16)[name = string("op_1534_cast_fp16")]; + tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1534_cast_fp16)[name = string("qk_39_cast_fp16")]; + tensor var_1537_cast_fp16 = softmax(axis = var_1446, x = qk_39_cast_fp16)[name = string("op_1537_cast_fp16")]; + bool var_1539_transpose_x_0 = const()[name = string("op_1539_transpose_x_0"), val = bool(false)]; + bool var_1539_transpose_y_0 = const()[name = string("op_1539_transpose_y_0"), val = bool(false)]; + tensor v_65_cast_fp16 = transpose(perm = var_1530, x = var_1529_cast_fp16)[name = string("transpose_592")]; + tensor var_1539_cast_fp16 = matmul(transpose_x = var_1539_transpose_x_0, transpose_y = var_1539_transpose_y_0, x = var_1537_cast_fp16, y = v_65_cast_fp16)[name = string("op_1539_cast_fp16")]; + tensor var_1540 = const()[name = string("op_1540"), val = tensor([0, 2, 1, 3])]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 1280])]; + tensor var_1541_cast_fp16 = transpose(perm = var_1540, x = var_1539_cast_fp16)[name = string("transpose_589")]; + tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1541_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_1545_to_fp16 = const()[name = string("op_1545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423514304)))]; + tensor var_1546_to_fp16 = const()[name = string("op_1546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426791168)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1546_to_fp16, weight = var_1545_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_1553_axes_0 = const()[name = string("op_1553_axes_0"), val = tensor([-1])]; + tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426793792)))]; + tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426796416)))]; + tensor var_1553_cast_fp16 = layer_norm(axes = var_1553_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1553_cast_fp16")]; + tensor var_1562_to_fp16 = const()[name = string("op_1562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426799040)))]; + tensor var_1563_to_fp16 = const()[name = string("op_1563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430075904)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1563_to_fp16, weight = var_1562_to_fp16, x = var_1553_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; + tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; + tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; + tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; + tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 20, 64])]; + tensor var_1583_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1583_cast_fp16")]; + tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1583_cast_fp16, y = const_186_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1589 = const()[name = string("op_1589"), val = tensor([1, 1500, 20, -1])]; + tensor var_1590_cast_fp16 = reshape(shape = var_1589, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1590_cast_fp16")]; + tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_69_cast_fp16 = mul(x = var_1590_cast_fp16, y = const_187_to_fp16)[name = string("k_69_cast_fp16")]; + tensor var_1596 = const()[name = string("op_1596"), val = tensor([1, 1500, 20, -1])]; + tensor var_1597_cast_fp16 = reshape(shape = var_1596, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1597_cast_fp16")]; + tensor var_1598 = const()[name = string("op_1598"), val = tensor([0, 2, 1, 3])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_284 = transpose(perm = transpose_284_perm_0, x = k_69_cast_fp16)[name = string("transpose_586")]; + tensor transpose_283 = transpose(perm = transpose_283_perm_0, x = q_55_cast_fp16)[name = string("transpose_587")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_41_cast_fp16")]; + tensor var_1602_cast_fp16 = softmax(axis = var_1446, x = qk_41_cast_fp16)[name = string("op_1602_cast_fp16")]; + bool var_1604_transpose_x_0 = const()[name = string("op_1604_transpose_x_0"), val = bool(false)]; + bool var_1604_transpose_y_0 = const()[name = string("op_1604_transpose_y_0"), val = bool(false)]; + tensor v_69_cast_fp16 = transpose(perm = var_1598, x = var_1597_cast_fp16)[name = string("transpose_588")]; + tensor var_1604_cast_fp16 = matmul(transpose_x = var_1604_transpose_x_0, transpose_y = var_1604_transpose_y_0, x = var_1602_cast_fp16, y = v_69_cast_fp16)[name = string("op_1604_cast_fp16")]; + tensor var_1605 = const()[name = string("op_1605"), val = tensor([0, 2, 1, 3])]; + tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 1280])]; + tensor var_1606_cast_fp16 = transpose(perm = var_1605, x = var_1604_cast_fp16)[name = string("transpose_585")]; + tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1606_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430078528)))]; + tensor var_1611_to_fp16 = const()[name = string("op_1611_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433355392)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1611_to_fp16, weight = var_1610_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433358016)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433360640)))]; + tensor var_1618_cast_fp16 = layer_norm(axes = var_1618_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1452_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1618_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433363264)))]; + tensor var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446470528)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1618_cast_fp16)[name = string("linear_54_cast_fp16")]; + string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; + tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; + tensor var_1633_to_fp16 = const()[name = string("op_1633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446480832)))]; + tensor var_1634_to_fp16 = const()[name = string("op_1634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459588096)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1634_to_fp16, weight = var_1633_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; + tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_29_cast_fp16")]; + tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 1280])]; + tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_29_cast_fp16")]; + tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; + tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; + tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 1280])]; + tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; + int32 var_1657 = const()[name = string("op_1657"), val = int32(-1)]; + tensor var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459590720)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459593344)))]; + fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1675_cast_fp16 = layer_norm(axes = var_1675_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor var_1686_to_fp16 = const()[name = string("op_1686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459595968)))]; + tensor var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462872832)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1687_to_fp16, weight = var_1686_to_fp16, x = var_1675_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462875456)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1690_to_fp16, x = var_1675_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466152320)))]; + tensor var_1695_to_fp16 = const()[name = string("op_1695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469429184)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1695_to_fp16, weight = var_1694_to_fp16, x = var_1675_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor var_1697_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1697_shape_cast_fp16")]; + int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; + int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; + bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; + string var_1697_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1697_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; + tensor var_1697_shape_cast_fp16_to_uint16 = cast(dtype = var_1697_shape_cast_fp16_to_uint16_dtype_0, x = var_1697_shape_cast_fp16)[name = string("cast_376")]; + uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1697_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; + string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_375")]; + int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; + tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; + tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")]; + tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")]; + int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; + int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1280)]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; + tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_164, end_mask = var_1713_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1713_cast_fp16")]; + tensor var_1716_begin_0 = const()[name = string("op_1716_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1716_end_mask_0 = const()[name = string("op_1716_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1716_cast_fp16 = slice_by_index(begin = var_1716_begin_0, end = concat_164, end_mask = var_1716_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1716_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 20, 64])]; + tensor var_1726_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1726_cast_fp16")]; + tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1726_cast_fp16, y = const_188_to_fp16)[name = string("q_59_cast_fp16")]; + tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 20, 64])]; + tensor var_1733_cast_fp16 = reshape(shape = concat_167x, x = var_1713_cast_fp16)[name = string("op_1733_cast_fp16")]; + tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_1733_cast_fp16, y = const_189_to_fp16)[name = string("k_75_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 20, 64])]; + tensor var_1740_cast_fp16 = reshape(shape = concat_168x, x = var_1716_cast_fp16)[name = string("op_1740_cast_fp16")]; + tensor var_1741 = const()[name = string("op_1741"), val = tensor([0, 2, 1, 3])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_286 = transpose(perm = transpose_286_perm_0, x = k_75_cast_fp16)[name = string("transpose_582")]; + tensor transpose_285 = transpose(perm = transpose_285_perm_0, x = q_59_cast_fp16)[name = string("transpose_583")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_43_cast_fp16")]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; + tensor var_1744_begin_0 = const()[name = string("op_1744_begin_0"), val = tensor([0, 0])]; + tensor var_1744_end_mask_0 = const()[name = string("op_1744_end_mask_0"), val = tensor([false, true])]; + tensor var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = concat_169, end_mask = var_1744_end_mask_0, x = mask_to_fp16)[name = string("op_1744_cast_fp16")]; + int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; + tensor var_1745_begin_0 = const()[name = string("op_1745_begin_0"), val = tensor([0, 0])]; + tensor var_1745_end_mask_0 = const()[name = string("op_1745_end_mask_0"), val = tensor([true, false])]; + tensor var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = concat_170, end_mask = var_1745_end_mask_0, x = var_1744_cast_fp16)[name = string("op_1745_cast_fp16")]; + tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1745_cast_fp16)[name = string("qk_45_cast_fp16")]; + tensor var_1748_cast_fp16 = softmax(axis = var_1657, x = qk_45_cast_fp16)[name = string("op_1748_cast_fp16")]; + bool var_1750_transpose_x_0 = const()[name = string("op_1750_transpose_x_0"), val = bool(false)]; + bool var_1750_transpose_y_0 = const()[name = string("op_1750_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_1741, x = var_1740_cast_fp16)[name = string("transpose_584")]; + tensor var_1750_cast_fp16 = matmul(transpose_x = var_1750_transpose_x_0, transpose_y = var_1750_transpose_y_0, x = var_1748_cast_fp16, y = v_75_cast_fp16)[name = string("op_1750_cast_fp16")]; + tensor var_1751 = const()[name = string("op_1751"), val = tensor([0, 2, 1, 3])]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 1280])]; + tensor var_1752_cast_fp16 = transpose(perm = var_1751, x = var_1750_cast_fp16)[name = string("transpose_581")]; + tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1752_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1756_to_fp16 = const()[name = string("op_1756_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469431808)))]; + tensor var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472708672)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1757_to_fp16, weight = var_1756_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor var_1764_axes_0 = const()[name = string("op_1764_axes_0"), val = tensor([-1])]; + tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472711296)))]; + tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472713920)))]; + tensor var_1764_cast_fp16 = layer_norm(axes = var_1764_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor var_1773_to_fp16 = const()[name = string("op_1773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472716544)))]; + tensor var_1774_to_fp16 = const()[name = string("op_1774_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475993408)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1774_to_fp16, weight = var_1773_to_fp16, x = var_1764_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; + tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; + tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 20, 64])]; + tensor var_1794_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")]; + tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1794_cast_fp16, y = const_190_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 1500, 20, -1])]; + tensor var_1801_cast_fp16 = reshape(shape = var_1800, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1801_cast_fp16")]; + tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_1801_cast_fp16, y = const_191_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_1807 = const()[name = string("op_1807"), val = tensor([1, 1500, 20, -1])]; + tensor var_1808_cast_fp16 = reshape(shape = var_1807, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1808_cast_fp16")]; + tensor var_1809 = const()[name = string("op_1809"), val = tensor([0, 2, 1, 3])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_288 = transpose(perm = transpose_288_perm_0, x = k_79_cast_fp16)[name = string("transpose_578")]; + tensor transpose_287 = transpose(perm = transpose_287_perm_0, x = q_63_cast_fp16)[name = string("transpose_579")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_47_cast_fp16")]; + tensor var_1813_cast_fp16 = softmax(axis = var_1657, x = qk_47_cast_fp16)[name = string("op_1813_cast_fp16")]; + bool var_1815_transpose_x_0 = const()[name = string("op_1815_transpose_x_0"), val = bool(false)]; + bool var_1815_transpose_y_0 = const()[name = string("op_1815_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_1809, x = var_1808_cast_fp16)[name = string("transpose_580")]; + tensor var_1815_cast_fp16 = matmul(transpose_x = var_1815_transpose_x_0, transpose_y = var_1815_transpose_y_0, x = var_1813_cast_fp16, y = v_79_cast_fp16)[name = string("op_1815_cast_fp16")]; + tensor var_1816 = const()[name = string("op_1816"), val = tensor([0, 2, 1, 3])]; + tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 1280])]; + tensor var_1817_cast_fp16 = transpose(perm = var_1816, x = var_1815_cast_fp16)[name = string("transpose_577")]; + tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1817_cast_fp16)[name = string("x_139_cast_fp16")]; + tensor var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475996032)))]; + tensor var_1822_to_fp16 = const()[name = string("op_1822_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479272896)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1822_to_fp16, weight = var_1821_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; + tensor var_1829_axes_0 = const()[name = string("op_1829_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479275520)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479278144)))]; + tensor var_1829_cast_fp16 = layer_norm(axes = var_1829_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1663_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1829_cast_fp16")]; + tensor var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479280768)))]; + tensor var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492388032)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1839_to_fp16, weight = var_1838_to_fp16, x = var_1829_cast_fp16)[name = string("linear_62_cast_fp16")]; + string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; + tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1844_to_fp16 = const()[name = string("op_1844_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492398336)))]; + tensor var_1845_to_fp16 = const()[name = string("op_1845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505505600)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1845_to_fp16, weight = var_1844_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; + tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_33_cast_fp16")]; + tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 1280])]; + tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_33_cast_fp16")]; + tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; + tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; + tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 1280])]; + tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; + int32 var_1868 = const()[name = string("op_1868"), val = int32(-1)]; + tensor var_1886_axes_0 = const()[name = string("op_1886_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505508224)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505510848)))]; + fp16 var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1886_cast_fp16 = layer_norm(axes = var_1886_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505513472)))]; + tensor var_1898_to_fp16 = const()[name = string("op_1898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508790336)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1898_to_fp16, weight = var_1897_to_fp16, x = var_1886_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508792960)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1901_to_fp16, x = var_1886_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512069824)))]; + tensor var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515346688)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1906_to_fp16, weight = var_1905_to_fp16, x = var_1886_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1908_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1908_shape_cast_fp16")]; + int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; + int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; + bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; + string var_1908_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1908_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; + tensor var_1908_shape_cast_fp16_to_uint16 = cast(dtype = var_1908_shape_cast_fp16_to_uint16_dtype_0, x = var_1908_shape_cast_fp16)[name = string("cast_374")]; + uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1908_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; + string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_373")]; + int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; + tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; + tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; + tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; + tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; + int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; + bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; + tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; + tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")]; + tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")]; + int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; + int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1280)]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; + tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_186, end_mask = var_1924_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1924_cast_fp16")]; + tensor var_1927_begin_0 = const()[name = string("op_1927_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1927_end_mask_0 = const()[name = string("op_1927_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1927_cast_fp16 = slice_by_index(begin = var_1927_begin_0, end = concat_186, end_mask = var_1927_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1927_cast_fp16")]; + tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 20, 64])]; + tensor var_1937_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1937_cast_fp16")]; + tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1937_cast_fp16, y = const_192_to_fp16)[name = string("q_67_cast_fp16")]; + tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 20, 64])]; + tensor var_1944_cast_fp16 = reshape(shape = concat_189x, x = var_1924_cast_fp16)[name = string("op_1944_cast_fp16")]; + tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_85_cast_fp16 = mul(x = var_1944_cast_fp16, y = const_193_to_fp16)[name = string("k_85_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 20, 64])]; + tensor var_1951_cast_fp16 = reshape(shape = concat_190x, x = var_1927_cast_fp16)[name = string("op_1951_cast_fp16")]; + tensor var_1952 = const()[name = string("op_1952"), val = tensor([0, 2, 1, 3])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_289_perm_0 = const()[name = string("transpose_289_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_290_perm_0 = const()[name = string("transpose_290_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_290 = transpose(perm = transpose_290_perm_0, x = k_85_cast_fp16)[name = string("transpose_574")]; + tensor transpose_289 = transpose(perm = transpose_289_perm_0, x = q_67_cast_fp16)[name = string("transpose_575")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_289, y = transpose_290)[name = string("qk_49_cast_fp16")]; + int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; + int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; + bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; + tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; + tensor var_1955_begin_0 = const()[name = string("op_1955_begin_0"), val = tensor([0, 0])]; + tensor var_1955_end_mask_0 = const()[name = string("op_1955_end_mask_0"), val = tensor([false, true])]; + tensor var_1955_cast_fp16 = slice_by_index(begin = var_1955_begin_0, end = concat_191, end_mask = var_1955_end_mask_0, x = mask_to_fp16)[name = string("op_1955_cast_fp16")]; + int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; + tensor var_1956_begin_0 = const()[name = string("op_1956_begin_0"), val = tensor([0, 0])]; + tensor var_1956_end_mask_0 = const()[name = string("op_1956_end_mask_0"), val = tensor([true, false])]; + tensor var_1956_cast_fp16 = slice_by_index(begin = var_1956_begin_0, end = concat_192, end_mask = var_1956_end_mask_0, x = var_1955_cast_fp16)[name = string("op_1956_cast_fp16")]; + tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1956_cast_fp16)[name = string("qk_51_cast_fp16")]; + tensor var_1959_cast_fp16 = softmax(axis = var_1868, x = qk_51_cast_fp16)[name = string("op_1959_cast_fp16")]; + bool var_1961_transpose_x_0 = const()[name = string("op_1961_transpose_x_0"), val = bool(false)]; + bool var_1961_transpose_y_0 = const()[name = string("op_1961_transpose_y_0"), val = bool(false)]; + tensor v_85_cast_fp16 = transpose(perm = var_1952, x = var_1951_cast_fp16)[name = string("transpose_576")]; + tensor var_1961_cast_fp16 = matmul(transpose_x = var_1961_transpose_x_0, transpose_y = var_1961_transpose_y_0, x = var_1959_cast_fp16, y = v_85_cast_fp16)[name = string("op_1961_cast_fp16")]; + tensor var_1962 = const()[name = string("op_1962"), val = tensor([0, 2, 1, 3])]; + tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 1280])]; + tensor var_1963_cast_fp16 = transpose(perm = var_1962, x = var_1961_cast_fp16)[name = string("transpose_573")]; + tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1963_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515349312)))]; + tensor var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518626176)))]; + tensor linear_67_cast_fp16 = linear(bias = var_1968_to_fp16, weight = var_1967_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1975_axes_0 = const()[name = string("op_1975_axes_0"), val = tensor([-1])]; + tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518628800)))]; + tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518631424)))]; + tensor var_1975_cast_fp16 = layer_norm(axes = var_1975_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1975_cast_fp16")]; + tensor var_1984_to_fp16 = const()[name = string("op_1984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518634048)))]; + tensor var_1985_to_fp16 = const()[name = string("op_1985_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521910912)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1985_to_fp16, weight = var_1984_to_fp16, x = var_1975_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; + tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; + tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; + tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 20, 64])]; + tensor var_2005_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_2005_cast_fp16")]; + tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_2005_cast_fp16, y = const_194_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_2011 = const()[name = string("op_2011"), val = tensor([1, 1500, 20, -1])]; + tensor var_2012_cast_fp16 = reshape(shape = var_2011, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2012_cast_fp16")]; + tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_89_cast_fp16 = mul(x = var_2012_cast_fp16, y = const_195_to_fp16)[name = string("k_89_cast_fp16")]; + tensor var_2018 = const()[name = string("op_2018"), val = tensor([1, 1500, 20, -1])]; + tensor var_2019_cast_fp16 = reshape(shape = var_2018, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_2019_cast_fp16")]; + tensor var_2020 = const()[name = string("op_2020"), val = tensor([0, 2, 1, 3])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_291_perm_0 = const()[name = string("transpose_291_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_292_perm_0 = const()[name = string("transpose_292_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_292 = transpose(perm = transpose_292_perm_0, x = k_89_cast_fp16)[name = string("transpose_570")]; + tensor transpose_291 = transpose(perm = transpose_291_perm_0, x = q_71_cast_fp16)[name = string("transpose_571")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_291, y = transpose_292)[name = string("qk_53_cast_fp16")]; + tensor var_2024_cast_fp16 = softmax(axis = var_1868, x = qk_53_cast_fp16)[name = string("op_2024_cast_fp16")]; + bool var_2026_transpose_x_0 = const()[name = string("op_2026_transpose_x_0"), val = bool(false)]; + bool var_2026_transpose_y_0 = const()[name = string("op_2026_transpose_y_0"), val = bool(false)]; + tensor v_89_cast_fp16 = transpose(perm = var_2020, x = var_2019_cast_fp16)[name = string("transpose_572")]; + tensor var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_0, transpose_y = var_2026_transpose_y_0, x = var_2024_cast_fp16, y = v_89_cast_fp16)[name = string("op_2026_cast_fp16")]; + tensor var_2027 = const()[name = string("op_2027"), val = tensor([0, 2, 1, 3])]; + tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 1280])]; + tensor var_2028_cast_fp16 = transpose(perm = var_2027, x = var_2026_cast_fp16)[name = string("transpose_569")]; + tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_2028_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(521913536)))]; + tensor var_2033_to_fp16 = const()[name = string("op_2033_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525190400)))]; + tensor linear_69_cast_fp16 = linear(bias = var_2033_to_fp16, weight = var_2032_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_2040_axes_0 = const()[name = string("op_2040_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525193024)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525195648)))]; + tensor var_2040_cast_fp16 = layer_norm(axes = var_2040_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1874_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2040_cast_fp16")]; + tensor var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525198272)))]; + tensor var_2050_to_fp16 = const()[name = string("op_2050_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538305536)))]; + tensor linear_70_cast_fp16 = linear(bias = var_2050_to_fp16, weight = var_2049_to_fp16, x = var_2040_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; + tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_2055_to_fp16 = const()[name = string("op_2055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538315840)))]; + tensor var_2056_to_fp16 = const()[name = string("op_2056_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551423104)))]; + tensor linear_71_cast_fp16 = linear(bias = var_2056_to_fp16, weight = var_2055_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; + tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_37_cast_fp16")]; + tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 1280])]; + tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_37_cast_fp16")]; + tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; + tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; + tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 1280])]; + tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; + int32 var_2079 = const()[name = string("op_2079"), val = int32(-1)]; + tensor var_2097_axes_0 = const()[name = string("op_2097_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551425728)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551428352)))]; + fp16 var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2097_cast_fp16 = layer_norm(axes = var_2097_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2097_cast_fp16")]; + tensor var_2108_to_fp16 = const()[name = string("op_2108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551430976)))]; + tensor var_2109_to_fp16 = const()[name = string("op_2109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554707840)))]; + tensor linear_72_cast_fp16 = linear(bias = var_2109_to_fp16, weight = var_2108_to_fp16, x = var_2097_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554710464)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2112_to_fp16, x = var_2097_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557987328)))]; + tensor var_2117_to_fp16 = const()[name = string("op_2117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561264192)))]; + tensor linear_74_cast_fp16 = linear(bias = var_2117_to_fp16, weight = var_2116_to_fp16, x = var_2097_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2119_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2119_shape_cast_fp16")]; + int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; + int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; + bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; + string var_2119_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2119_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; + tensor var_2119_shape_cast_fp16_to_uint16 = cast(dtype = var_2119_shape_cast_fp16_to_uint16_dtype_0, x = var_2119_shape_cast_fp16)[name = string("cast_372")]; + uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2119_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; + string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_371")]; + int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; + tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; + tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; + tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")]; + tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")]; + int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; + int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1280)]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; + tensor var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_208, end_mask = var_2135_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2135_cast_fp16")]; + tensor var_2138_begin_0 = const()[name = string("op_2138_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2138_end_mask_0 = const()[name = string("op_2138_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = concat_208, end_mask = var_2138_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2138_cast_fp16")]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 20, 64])]; + tensor var_2148_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2148_cast_fp16")]; + tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2148_cast_fp16, y = const_196_to_fp16)[name = string("q_75_cast_fp16")]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 20, 64])]; + tensor var_2155_cast_fp16 = reshape(shape = concat_211x, x = var_2135_cast_fp16)[name = string("op_2155_cast_fp16")]; + tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2155_cast_fp16, y = const_197_to_fp16)[name = string("k_95_cast_fp16")]; + tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 20, 64])]; + tensor var_2162_cast_fp16 = reshape(shape = concat_212x, x = var_2138_cast_fp16)[name = string("op_2162_cast_fp16")]; + tensor var_2163 = const()[name = string("op_2163"), val = tensor([0, 2, 1, 3])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_293_perm_0 = const()[name = string("transpose_293_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_294_perm_0 = const()[name = string("transpose_294_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_294 = transpose(perm = transpose_294_perm_0, x = k_95_cast_fp16)[name = string("transpose_566")]; + tensor transpose_293 = transpose(perm = transpose_293_perm_0, x = q_75_cast_fp16)[name = string("transpose_567")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_293, y = transpose_294)[name = string("qk_55_cast_fp16")]; + int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; + int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; + bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; + tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; + tensor var_2166_begin_0 = const()[name = string("op_2166_begin_0"), val = tensor([0, 0])]; + tensor var_2166_end_mask_0 = const()[name = string("op_2166_end_mask_0"), val = tensor([false, true])]; + tensor var_2166_cast_fp16 = slice_by_index(begin = var_2166_begin_0, end = concat_213, end_mask = var_2166_end_mask_0, x = mask_to_fp16)[name = string("op_2166_cast_fp16")]; + int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; + tensor var_2167_begin_0 = const()[name = string("op_2167_begin_0"), val = tensor([0, 0])]; + tensor var_2167_end_mask_0 = const()[name = string("op_2167_end_mask_0"), val = tensor([true, false])]; + tensor var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = concat_214, end_mask = var_2167_end_mask_0, x = var_2166_cast_fp16)[name = string("op_2167_cast_fp16")]; + tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2167_cast_fp16)[name = string("qk_57_cast_fp16")]; + tensor var_2170_cast_fp16 = softmax(axis = var_2079, x = qk_57_cast_fp16)[name = string("op_2170_cast_fp16")]; + bool var_2172_transpose_x_0 = const()[name = string("op_2172_transpose_x_0"), val = bool(false)]; + bool var_2172_transpose_y_0 = const()[name = string("op_2172_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2163, x = var_2162_cast_fp16)[name = string("transpose_568")]; + tensor var_2172_cast_fp16 = matmul(transpose_x = var_2172_transpose_x_0, transpose_y = var_2172_transpose_y_0, x = var_2170_cast_fp16, y = v_95_cast_fp16)[name = string("op_2172_cast_fp16")]; + tensor var_2173 = const()[name = string("op_2173"), val = tensor([0, 2, 1, 3])]; + tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 1280])]; + tensor var_2174_cast_fp16 = transpose(perm = var_2173, x = var_2172_cast_fp16)[name = string("transpose_565")]; + tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2174_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561266816)))]; + tensor var_2179_to_fp16 = const()[name = string("op_2179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564543680)))]; + tensor linear_75_cast_fp16 = linear(bias = var_2179_to_fp16, weight = var_2178_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; + tensor var_2186_axes_0 = const()[name = string("op_2186_axes_0"), val = tensor([-1])]; + tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564546304)))]; + tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564548928)))]; + tensor var_2186_cast_fp16 = layer_norm(axes = var_2186_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2186_cast_fp16")]; + tensor var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564551552)))]; + tensor var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567828416)))]; + tensor linear_76_cast_fp16 = linear(bias = var_2196_to_fp16, weight = var_2195_to_fp16, x = var_2186_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; + tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; + tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; + tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; + tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 20, 64])]; + tensor var_2216_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2216_cast_fp16")]; + tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2216_cast_fp16, y = const_198_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2222 = const()[name = string("op_2222"), val = tensor([1, 1500, 20, -1])]; + tensor var_2223_cast_fp16 = reshape(shape = var_2222, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2223_cast_fp16")]; + tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2223_cast_fp16, y = const_199_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2229 = const()[name = string("op_2229"), val = tensor([1, 1500, 20, -1])]; + tensor var_2230_cast_fp16 = reshape(shape = var_2229, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2230_cast_fp16")]; + tensor var_2231 = const()[name = string("op_2231"), val = tensor([0, 2, 1, 3])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_295_perm_0 = const()[name = string("transpose_295_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_296_perm_0 = const()[name = string("transpose_296_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_296 = transpose(perm = transpose_296_perm_0, x = k_99_cast_fp16)[name = string("transpose_562")]; + tensor transpose_295 = transpose(perm = transpose_295_perm_0, x = q_79_cast_fp16)[name = string("transpose_563")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_295, y = transpose_296)[name = string("qk_59_cast_fp16")]; + tensor var_2235_cast_fp16 = softmax(axis = var_2079, x = qk_59_cast_fp16)[name = string("op_2235_cast_fp16")]; + bool var_2237_transpose_x_0 = const()[name = string("op_2237_transpose_x_0"), val = bool(false)]; + bool var_2237_transpose_y_0 = const()[name = string("op_2237_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2231, x = var_2230_cast_fp16)[name = string("transpose_564")]; + tensor var_2237_cast_fp16 = matmul(transpose_x = var_2237_transpose_x_0, transpose_y = var_2237_transpose_y_0, x = var_2235_cast_fp16, y = v_99_cast_fp16)[name = string("op_2237_cast_fp16")]; + tensor var_2238 = const()[name = string("op_2238"), val = tensor([0, 2, 1, 3])]; + tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 1280])]; + tensor var_2239_cast_fp16 = transpose(perm = var_2238, x = var_2237_cast_fp16)[name = string("transpose_561")]; + tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2239_cast_fp16)[name = string("x_175_cast_fp16")]; + tensor var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567831040)))]; + tensor var_2244_to_fp16 = const()[name = string("op_2244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571107904)))]; + tensor linear_77_cast_fp16 = linear(bias = var_2244_to_fp16, weight = var_2243_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; + tensor var_2251_axes_0 = const()[name = string("op_2251_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571110528)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571113152)))]; + tensor var_2251_cast_fp16 = layer_norm(axes = var_2251_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2085_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2251_cast_fp16")]; + tensor var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571115776)))]; + tensor var_2261_to_fp16 = const()[name = string("op_2261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584223040)))]; + tensor linear_78_cast_fp16 = linear(bias = var_2261_to_fp16, weight = var_2260_to_fp16, x = var_2251_cast_fp16)[name = string("linear_78_cast_fp16")]; + string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; + tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_2266_to_fp16 = const()[name = string("op_2266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(584233344)))]; + tensor var_2267_to_fp16 = const()[name = string("op_2267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597340608)))]; + tensor linear_79_cast_fp16 = linear(bias = var_2267_to_fp16, weight = var_2266_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; + tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; + tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_41_cast_fp16")]; + tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 1280])]; + tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_41_cast_fp16")]; + tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; + tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; + tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 1280])]; + tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; + int32 var_2290 = const()[name = string("op_2290"), val = int32(-1)]; + tensor var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597343232)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597345856)))]; + fp16 var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2308_cast_fp16")]; + tensor var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597348480)))]; + tensor var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600625344)))]; + tensor linear_80_cast_fp16 = linear(bias = var_2320_to_fp16, weight = var_2319_to_fp16, x = var_2308_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600627968)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2323_to_fp16, x = var_2308_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(603904832)))]; + tensor var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607181696)))]; + tensor linear_82_cast_fp16 = linear(bias = var_2328_to_fp16, weight = var_2327_to_fp16, x = var_2308_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor var_2330_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2330_shape_cast_fp16")]; + int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; + int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; + bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; + string var_2330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; + tensor var_2330_shape_cast_fp16_to_uint16 = cast(dtype = var_2330_shape_cast_fp16_to_uint16_dtype_0, x = var_2330_shape_cast_fp16)[name = string("cast_370")]; + uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2330_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; + string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_369")]; + int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; + tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; + tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; + tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; + tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; + int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; + bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; + tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; + tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")]; + tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")]; + int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; + int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1280)]; + int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; + bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; + tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; + tensor var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_230, end_mask = var_2346_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2346_cast_fp16")]; + tensor var_2349_begin_0 = const()[name = string("op_2349_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2349_end_mask_0 = const()[name = string("op_2349_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = concat_230, end_mask = var_2349_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2349_cast_fp16")]; + tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 20, 64])]; + tensor var_2359_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2359_cast_fp16")]; + tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2359_cast_fp16, y = const_200_to_fp16)[name = string("q_83_cast_fp16")]; + tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 20, 64])]; + tensor var_2366_cast_fp16 = reshape(shape = concat_233x, x = var_2346_cast_fp16)[name = string("op_2366_cast_fp16")]; + tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_105_cast_fp16 = mul(x = var_2366_cast_fp16, y = const_201_to_fp16)[name = string("k_105_cast_fp16")]; + tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 20, 64])]; + tensor var_2373_cast_fp16 = reshape(shape = concat_234x, x = var_2349_cast_fp16)[name = string("op_2373_cast_fp16")]; + tensor var_2374 = const()[name = string("op_2374"), val = tensor([0, 2, 1, 3])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_297_perm_0 = const()[name = string("transpose_297_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_298_perm_0 = const()[name = string("transpose_298_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_298 = transpose(perm = transpose_298_perm_0, x = k_105_cast_fp16)[name = string("transpose_558")]; + tensor transpose_297 = transpose(perm = transpose_297_perm_0, x = q_83_cast_fp16)[name = string("transpose_559")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_297, y = transpose_298)[name = string("qk_61_cast_fp16")]; + int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; + int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; + bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; + tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; + tensor var_2377_begin_0 = const()[name = string("op_2377_begin_0"), val = tensor([0, 0])]; + tensor var_2377_end_mask_0 = const()[name = string("op_2377_end_mask_0"), val = tensor([false, true])]; + tensor var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = concat_235, end_mask = var_2377_end_mask_0, x = mask_to_fp16)[name = string("op_2377_cast_fp16")]; + int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; + tensor var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor([0, 0])]; + tensor var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor([true, false])]; + tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = concat_236, end_mask = var_2378_end_mask_0, x = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")]; + tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2378_cast_fp16)[name = string("qk_63_cast_fp16")]; + tensor var_2381_cast_fp16 = softmax(axis = var_2290, x = qk_63_cast_fp16)[name = string("op_2381_cast_fp16")]; + bool var_2383_transpose_x_0 = const()[name = string("op_2383_transpose_x_0"), val = bool(false)]; + bool var_2383_transpose_y_0 = const()[name = string("op_2383_transpose_y_0"), val = bool(false)]; + tensor v_105_cast_fp16 = transpose(perm = var_2374, x = var_2373_cast_fp16)[name = string("transpose_560")]; + tensor var_2383_cast_fp16 = matmul(transpose_x = var_2383_transpose_x_0, transpose_y = var_2383_transpose_y_0, x = var_2381_cast_fp16, y = v_105_cast_fp16)[name = string("op_2383_cast_fp16")]; + tensor var_2384 = const()[name = string("op_2384"), val = tensor([0, 2, 1, 3])]; + tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 1280])]; + tensor var_2385_cast_fp16 = transpose(perm = var_2384, x = var_2383_cast_fp16)[name = string("transpose_557")]; + tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2385_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_2389_to_fp16 = const()[name = string("op_2389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607184320)))]; + tensor var_2390_to_fp16 = const()[name = string("op_2390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610461184)))]; + tensor linear_83_cast_fp16 = linear(bias = var_2390_to_fp16, weight = var_2389_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; + tensor var_2397_axes_0 = const()[name = string("op_2397_axes_0"), val = tensor([-1])]; + tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610463808)))]; + tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610466432)))]; + tensor var_2397_cast_fp16 = layer_norm(axes = var_2397_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2397_cast_fp16")]; + tensor var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610469056)))]; + tensor var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613745920)))]; + tensor linear_84_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = var_2397_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; + tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; + tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; + tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; + tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 20, 64])]; + tensor var_2427_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2427_cast_fp16")]; + tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2427_cast_fp16, y = const_202_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2433 = const()[name = string("op_2433"), val = tensor([1, 1500, 20, -1])]; + tensor var_2434_cast_fp16 = reshape(shape = var_2433, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2434_cast_fp16")]; + tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_109_cast_fp16 = mul(x = var_2434_cast_fp16, y = const_203_to_fp16)[name = string("k_109_cast_fp16")]; + tensor var_2440 = const()[name = string("op_2440"), val = tensor([1, 1500, 20, -1])]; + tensor var_2441_cast_fp16 = reshape(shape = var_2440, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2441_cast_fp16")]; + tensor var_2442 = const()[name = string("op_2442"), val = tensor([0, 2, 1, 3])]; + bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; + bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; + tensor transpose_299_perm_0 = const()[name = string("transpose_299_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_300_perm_0 = const()[name = string("transpose_300_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_300 = transpose(perm = transpose_300_perm_0, x = k_109_cast_fp16)[name = string("transpose_554")]; + tensor transpose_299 = transpose(perm = transpose_299_perm_0, x = q_87_cast_fp16)[name = string("transpose_555")]; + tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_299, y = transpose_300)[name = string("qk_65_cast_fp16")]; + tensor var_2446_cast_fp16 = softmax(axis = var_2290, x = qk_65_cast_fp16)[name = string("op_2446_cast_fp16")]; + bool var_2448_transpose_x_0 = const()[name = string("op_2448_transpose_x_0"), val = bool(false)]; + bool var_2448_transpose_y_0 = const()[name = string("op_2448_transpose_y_0"), val = bool(false)]; + tensor v_109_cast_fp16 = transpose(perm = var_2442, x = var_2441_cast_fp16)[name = string("transpose_556")]; + tensor var_2448_cast_fp16 = matmul(transpose_x = var_2448_transpose_x_0, transpose_y = var_2448_transpose_y_0, x = var_2446_cast_fp16, y = v_109_cast_fp16)[name = string("op_2448_cast_fp16")]; + tensor var_2449 = const()[name = string("op_2449"), val = tensor([0, 2, 1, 3])]; + tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 1280])]; + tensor var_2450_cast_fp16 = transpose(perm = var_2449, x = var_2448_cast_fp16)[name = string("transpose_553")]; + tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2450_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613748544)))]; + tensor var_2455_to_fp16 = const()[name = string("op_2455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617025408)))]; + tensor linear_85_cast_fp16 = linear(bias = var_2455_to_fp16, weight = var_2454_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_2462_axes_0 = const()[name = string("op_2462_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617028032)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617030656)))]; + tensor var_2462_cast_fp16 = layer_norm(axes = var_2462_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2296_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2462_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617033280)))]; + tensor var_2472_to_fp16 = const()[name = string("op_2472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630140544)))]; + tensor linear_86_cast_fp16 = linear(bias = var_2472_to_fp16, weight = var_2471_to_fp16, x = var_2462_cast_fp16)[name = string("linear_86_cast_fp16")]; + string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; + tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; + tensor var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630150848)))]; + tensor var_2478_to_fp16 = const()[name = string("op_2478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643258112)))]; + tensor linear_87_cast_fp16 = linear(bias = var_2478_to_fp16, weight = var_2477_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; + tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_45_cast_fp16")]; + tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 1280])]; + tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_45_cast_fp16")]; + tensor k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; + tensor k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")]; + tensor v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor([12, 1, 1500, 1280])]; + tensor v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")]; + int32 var_2501 = const()[name = string("op_2501"), val = int32(-1)]; + tensor var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643260736)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643263360)))]; + fp16 var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2519_cast_fp16 = layer_norm(axes = var_2519_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2519_cast_fp16")]; + tensor var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643265984)))]; + tensor var_2531_to_fp16 = const()[name = string("op_2531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646542848)))]; + tensor linear_88_cast_fp16 = linear(bias = var_2531_to_fp16, weight = var_2530_to_fp16, x = var_2519_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2534_to_fp16 = const()[name = string("op_2534_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646545472)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2534_to_fp16, x = var_2519_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649822336)))]; + tensor var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653099200)))]; + tensor linear_90_cast_fp16 = linear(bias = var_2539_to_fp16, weight = var_2538_to_fp16, x = var_2519_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_2541_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2541_shape_cast_fp16")]; + int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; + int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; + bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; + string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; + tensor var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_368")]; + uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; + string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_367")]; + int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")]; + tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")]; + tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; + tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; + tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; + tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; + int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; + bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; + tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; + tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")]; + tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")]; + int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; + int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1280)]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")]; + tensor var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_252, end_mask = var_2557_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2557_cast_fp16")]; + tensor var_2560_begin_0 = const()[name = string("op_2560_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2560_end_mask_0 = const()[name = string("op_2560_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2560_cast_fp16 = slice_by_index(begin = var_2560_begin_0, end = concat_252, end_mask = var_2560_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2560_cast_fp16")]; + tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 20, 64])]; + tensor var_2570_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2570_cast_fp16")]; + tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2570_cast_fp16, y = const_204_to_fp16)[name = string("q_91_cast_fp16")]; + tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 20, 64])]; + tensor var_2577_cast_fp16 = reshape(shape = concat_255x, x = var_2557_cast_fp16)[name = string("op_2577_cast_fp16")]; + tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_2577_cast_fp16, y = const_205_to_fp16)[name = string("k_115_cast_fp16")]; + tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 20, 64])]; + tensor var_2584_cast_fp16 = reshape(shape = concat_256x, x = var_2560_cast_fp16)[name = string("op_2584_cast_fp16")]; + tensor var_2585 = const()[name = string("op_2585"), val = tensor([0, 2, 1, 3])]; + bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; + bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; + tensor transpose_301_perm_0 = const()[name = string("transpose_301_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_302_perm_0 = const()[name = string("transpose_302_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_302 = transpose(perm = transpose_302_perm_0, x = k_115_cast_fp16)[name = string("transpose_550")]; + tensor transpose_301 = transpose(perm = transpose_301_perm_0, x = q_91_cast_fp16)[name = string("transpose_551")]; + tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_301, y = transpose_302)[name = string("qk_67_cast_fp16")]; + int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; + int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; + bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; + tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; + tensor var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor([0, 0])]; + tensor var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor([false, true])]; + tensor var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = concat_257, end_mask = var_2588_end_mask_0, x = mask_to_fp16)[name = string("op_2588_cast_fp16")]; + int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; + tensor var_2589_begin_0 = const()[name = string("op_2589_begin_0"), val = tensor([0, 0])]; + tensor var_2589_end_mask_0 = const()[name = string("op_2589_end_mask_0"), val = tensor([true, false])]; + tensor var_2589_cast_fp16 = slice_by_index(begin = var_2589_begin_0, end = concat_258, end_mask = var_2589_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2589_cast_fp16")]; + tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2589_cast_fp16)[name = string("qk_69_cast_fp16")]; + tensor var_2592_cast_fp16 = softmax(axis = var_2501, x = qk_69_cast_fp16)[name = string("op_2592_cast_fp16")]; + bool var_2594_transpose_x_0 = const()[name = string("op_2594_transpose_x_0"), val = bool(false)]; + bool var_2594_transpose_y_0 = const()[name = string("op_2594_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_2585, x = var_2584_cast_fp16)[name = string("transpose_552")]; + tensor var_2594_cast_fp16 = matmul(transpose_x = var_2594_transpose_x_0, transpose_y = var_2594_transpose_y_0, x = var_2592_cast_fp16, y = v_115_cast_fp16)[name = string("op_2594_cast_fp16")]; + tensor var_2595 = const()[name = string("op_2595"), val = tensor([0, 2, 1, 3])]; + tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 1280])]; + tensor var_2596_cast_fp16 = transpose(perm = var_2595, x = var_2594_cast_fp16)[name = string("transpose_549")]; + tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2596_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_2600_to_fp16 = const()[name = string("op_2600_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(653101824)))]; + tensor var_2601_to_fp16 = const()[name = string("op_2601_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656378688)))]; + tensor linear_91_cast_fp16 = linear(bias = var_2601_to_fp16, weight = var_2600_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_2608_axes_0 = const()[name = string("op_2608_axes_0"), val = tensor([-1])]; + tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656381312)))]; + tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656383936)))]; + tensor var_2608_cast_fp16 = layer_norm(axes = var_2608_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2608_cast_fp16")]; + tensor var_2617_to_fp16 = const()[name = string("op_2617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(656386560)))]; + tensor var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659663424)))]; + tensor linear_92_cast_fp16 = linear(bias = var_2618_to_fp16, weight = var_2617_to_fp16, x = var_2608_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; + tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; + tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; + tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; + tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 20, 64])]; + tensor var_2638_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2638_cast_fp16")]; + tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_95_cast_fp16 = mul(x = var_2638_cast_fp16, y = const_206_to_fp16)[name = string("q_95_cast_fp16")]; + tensor var_2644 = const()[name = string("op_2644"), val = tensor([1, 1500, 20, -1])]; + tensor var_2645_cast_fp16 = reshape(shape = var_2644, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2645_cast_fp16")]; + tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_119_cast_fp16 = mul(x = var_2645_cast_fp16, y = const_207_to_fp16)[name = string("k_119_cast_fp16")]; + tensor var_2651 = const()[name = string("op_2651"), val = tensor([1, 1500, 20, -1])]; + tensor var_2652_cast_fp16 = reshape(shape = var_2651, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2652_cast_fp16")]; + tensor var_2653 = const()[name = string("op_2653"), val = tensor([0, 2, 1, 3])]; + bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)]; + bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)]; + tensor transpose_303_perm_0 = const()[name = string("transpose_303_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_304_perm_0 = const()[name = string("transpose_304_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_304 = transpose(perm = transpose_304_perm_0, x = k_119_cast_fp16)[name = string("transpose_546")]; + tensor transpose_303 = transpose(perm = transpose_303_perm_0, x = q_95_cast_fp16)[name = string("transpose_547")]; + tensor qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_303, y = transpose_304)[name = string("qk_71_cast_fp16")]; + tensor var_2657_cast_fp16 = softmax(axis = var_2501, x = qk_71_cast_fp16)[name = string("op_2657_cast_fp16")]; + bool var_2659_transpose_x_0 = const()[name = string("op_2659_transpose_x_0"), val = bool(false)]; + bool var_2659_transpose_y_0 = const()[name = string("op_2659_transpose_y_0"), val = bool(false)]; + tensor v_119_cast_fp16 = transpose(perm = var_2653, x = var_2652_cast_fp16)[name = string("transpose_548")]; + tensor var_2659_cast_fp16 = matmul(transpose_x = var_2659_transpose_x_0, transpose_y = var_2659_transpose_y_0, x = var_2657_cast_fp16, y = v_119_cast_fp16)[name = string("op_2659_cast_fp16")]; + tensor var_2660 = const()[name = string("op_2660"), val = tensor([0, 2, 1, 3])]; + tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 1280])]; + tensor var_2661_cast_fp16 = transpose(perm = var_2660, x = var_2659_cast_fp16)[name = string("transpose_545")]; + tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2661_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_2665_to_fp16 = const()[name = string("op_2665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659666048)))]; + tensor var_2666_to_fp16 = const()[name = string("op_2666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662942912)))]; + tensor linear_93_cast_fp16 = linear(bias = var_2666_to_fp16, weight = var_2665_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_2673_axes_0 = const()[name = string("op_2673_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662945536)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662948160)))]; + tensor var_2673_cast_fp16 = layer_norm(axes = var_2673_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2507_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2673_cast_fp16")]; + tensor var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(662950784)))]; + tensor var_2683_to_fp16 = const()[name = string("op_2683_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676058048)))]; + tensor linear_94_cast_fp16 = linear(bias = var_2683_to_fp16, weight = var_2682_to_fp16, x = var_2673_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; + tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676068352)))]; + tensor var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689175616)))]; + tensor linear_95_cast_fp16 = linear(bias = var_2689_to_fp16, weight = var_2688_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; + tensor k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_49_cast_fp16")]; + tensor v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor([13, 1, 448, 1280])]; + tensor v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_49_cast_fp16")]; + tensor k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; + tensor k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")]; + tensor v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor([13, 1, 1500, 1280])]; + tensor v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")]; + int32 var_2712 = const()[name = string("op_2712"), val = int32(-1)]; + tensor var_2730_axes_0 = const()[name = string("op_2730_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689178240)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689180864)))]; + fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2730_cast_fp16 = layer_norm(axes = var_2730_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2730_cast_fp16")]; + tensor var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(689183488)))]; + tensor var_2742_to_fp16 = const()[name = string("op_2742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692460352)))]; + tensor linear_96_cast_fp16 = linear(bias = var_2742_to_fp16, weight = var_2741_to_fp16, x = var_2730_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_2745_to_fp16 = const()[name = string("op_2745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692462976)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2745_to_fp16, x = var_2730_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_2749_to_fp16 = const()[name = string("op_2749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695739840)))]; + tensor var_2750_to_fp16 = const()[name = string("op_2750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699016704)))]; + tensor linear_98_cast_fp16 = linear(bias = var_2750_to_fp16, weight = var_2749_to_fp16, x = var_2730_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_2752_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2752_shape_cast_fp16")]; + int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)]; + int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)]; + bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)]; + string var_2752_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2752_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)]; + tensor var_2752_shape_cast_fp16_to_uint16 = cast(dtype = var_2752_shape_cast_fp16_to_uint16_dtype_0, x = var_2752_shape_cast_fp16)[name = string("cast_366")]; + uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2752_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")]; + string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_365")]; + int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([0])]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")]; + tensor concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor([12])]; + int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; + bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; + tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")]; + tensor concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor([0])]; + tensor concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor([0])]; + tensor concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor([0])]; + int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; + bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; + tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")]; + tensor k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")]; + tensor v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")]; + int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)]; + int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1280)]; + int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)]; + bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)]; + tensor concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")]; + tensor var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_274, end_mask = var_2768_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2768_cast_fp16")]; + tensor var_2771_begin_0 = const()[name = string("op_2771_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2771_end_mask_0 = const()[name = string("op_2771_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = concat_274, end_mask = var_2771_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2771_cast_fp16")]; + tensor concat_276x = const()[name = string("concat_276x"), val = tensor([1, -1, 20, 64])]; + tensor var_2781_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2781_cast_fp16")]; + tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_99_cast_fp16 = mul(x = var_2781_cast_fp16, y = const_208_to_fp16)[name = string("q_99_cast_fp16")]; + tensor concat_277x = const()[name = string("concat_277x"), val = tensor([1, -1, 20, 64])]; + tensor var_2788_cast_fp16 = reshape(shape = concat_277x, x = var_2768_cast_fp16)[name = string("op_2788_cast_fp16")]; + tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_125_cast_fp16 = mul(x = var_2788_cast_fp16, y = const_209_to_fp16)[name = string("k_125_cast_fp16")]; + tensor concat_278x = const()[name = string("concat_278x"), val = tensor([1, -1, 20, 64])]; + tensor var_2795_cast_fp16 = reshape(shape = concat_278x, x = var_2771_cast_fp16)[name = string("op_2795_cast_fp16")]; + tensor var_2796 = const()[name = string("op_2796"), val = tensor([0, 2, 1, 3])]; + bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)]; + bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)]; + tensor transpose_305_perm_0 = const()[name = string("transpose_305_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_306_perm_0 = const()[name = string("transpose_306_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_306 = transpose(perm = transpose_306_perm_0, x = k_125_cast_fp16)[name = string("transpose_542")]; + tensor transpose_305 = transpose(perm = transpose_305_perm_0, x = q_99_cast_fp16)[name = string("transpose_543")]; + tensor qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_305, y = transpose_306)[name = string("qk_73_cast_fp16")]; + int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)]; + int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; + bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")]; + tensor var_2799_begin_0 = const()[name = string("op_2799_begin_0"), val = tensor([0, 0])]; + tensor var_2799_end_mask_0 = const()[name = string("op_2799_end_mask_0"), val = tensor([false, true])]; + tensor var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = concat_279, end_mask = var_2799_end_mask_0, x = mask_to_fp16)[name = string("op_2799_cast_fp16")]; + int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)]; + int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; + bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; + tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")]; + tensor var_2800_begin_0 = const()[name = string("op_2800_begin_0"), val = tensor([0, 0])]; + tensor var_2800_end_mask_0 = const()[name = string("op_2800_end_mask_0"), val = tensor([true, false])]; + tensor var_2800_cast_fp16 = slice_by_index(begin = var_2800_begin_0, end = concat_280, end_mask = var_2800_end_mask_0, x = var_2799_cast_fp16)[name = string("op_2800_cast_fp16")]; + tensor qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2800_cast_fp16)[name = string("qk_75_cast_fp16")]; + tensor var_2803_cast_fp16 = softmax(axis = var_2712, x = qk_75_cast_fp16)[name = string("op_2803_cast_fp16")]; + bool var_2805_transpose_x_0 = const()[name = string("op_2805_transpose_x_0"), val = bool(false)]; + bool var_2805_transpose_y_0 = const()[name = string("op_2805_transpose_y_0"), val = bool(false)]; + tensor v_125_cast_fp16 = transpose(perm = var_2796, x = var_2795_cast_fp16)[name = string("transpose_544")]; + tensor var_2805_cast_fp16 = matmul(transpose_x = var_2805_transpose_x_0, transpose_y = var_2805_transpose_y_0, x = var_2803_cast_fp16, y = v_125_cast_fp16)[name = string("op_2805_cast_fp16")]; + tensor var_2806 = const()[name = string("op_2806"), val = tensor([0, 2, 1, 3])]; + tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, -1, 1280])]; + tensor var_2807_cast_fp16 = transpose(perm = var_2806, x = var_2805_cast_fp16)[name = string("transpose_541")]; + tensor x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2807_cast_fp16)[name = string("x_223_cast_fp16")]; + tensor var_2811_to_fp16 = const()[name = string("op_2811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(699019328)))]; + tensor var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702296192)))]; + tensor linear_99_cast_fp16 = linear(bias = var_2812_to_fp16, weight = var_2811_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")]; + tensor var_2819_axes_0 = const()[name = string("op_2819_axes_0"), val = tensor([-1])]; + tensor blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702298816)))]; + tensor blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702301440)))]; + tensor var_2819_cast_fp16 = layer_norm(axes = var_2819_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2819_cast_fp16")]; + tensor var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702304064)))]; + tensor var_2829_to_fp16 = const()[name = string("op_2829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705580928)))]; + tensor linear_100_cast_fp16 = linear(bias = var_2829_to_fp16, weight = var_2828_to_fp16, x = var_2819_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([0, 0, 0])]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([0, 1500, 0])]; + tensor k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_284 = const()[name = string("concat_284"), val = tensor([0, 0, 0])]; + tensor concat_285 = const()[name = string("concat_285"), val = tensor([0, 1500, 0])]; + tensor v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 20, 64])]; + tensor var_2849_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2849_cast_fp16")]; + tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_103_cast_fp16 = mul(x = var_2849_cast_fp16, y = const_210_to_fp16)[name = string("q_103_cast_fp16")]; + tensor var_2855 = const()[name = string("op_2855"), val = tensor([1, 1500, 20, -1])]; + tensor var_2856_cast_fp16 = reshape(shape = var_2855, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2856_cast_fp16")]; + tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_129_cast_fp16 = mul(x = var_2856_cast_fp16, y = const_211_to_fp16)[name = string("k_129_cast_fp16")]; + tensor var_2862 = const()[name = string("op_2862"), val = tensor([1, 1500, 20, -1])]; + tensor var_2863_cast_fp16 = reshape(shape = var_2862, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2863_cast_fp16")]; + tensor var_2864 = const()[name = string("op_2864"), val = tensor([0, 2, 1, 3])]; + bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)]; + bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)]; + tensor transpose_307_perm_0 = const()[name = string("transpose_307_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_308_perm_0 = const()[name = string("transpose_308_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_308 = transpose(perm = transpose_308_perm_0, x = k_129_cast_fp16)[name = string("transpose_538")]; + tensor transpose_307 = transpose(perm = transpose_307_perm_0, x = q_103_cast_fp16)[name = string("transpose_539")]; + tensor qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_307, y = transpose_308)[name = string("qk_77_cast_fp16")]; + tensor var_2868_cast_fp16 = softmax(axis = var_2712, x = qk_77_cast_fp16)[name = string("op_2868_cast_fp16")]; + bool var_2870_transpose_x_0 = const()[name = string("op_2870_transpose_x_0"), val = bool(false)]; + bool var_2870_transpose_y_0 = const()[name = string("op_2870_transpose_y_0"), val = bool(false)]; + tensor v_129_cast_fp16 = transpose(perm = var_2864, x = var_2863_cast_fp16)[name = string("transpose_540")]; + tensor var_2870_cast_fp16 = matmul(transpose_x = var_2870_transpose_x_0, transpose_y = var_2870_transpose_y_0, x = var_2868_cast_fp16, y = v_129_cast_fp16)[name = string("op_2870_cast_fp16")]; + tensor var_2871 = const()[name = string("op_2871"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 1280])]; + tensor var_2872_cast_fp16 = transpose(perm = var_2871, x = var_2870_cast_fp16)[name = string("transpose_537")]; + tensor x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2872_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705583552)))]; + tensor var_2877_to_fp16 = const()[name = string("op_2877_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708860416)))]; + tensor linear_101_cast_fp16 = linear(bias = var_2877_to_fp16, weight = var_2876_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")]; + tensor var_2884_axes_0 = const()[name = string("op_2884_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708863040)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708865664)))]; + tensor var_2884_cast_fp16 = layer_norm(axes = var_2884_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2718_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2884_cast_fp16")]; + tensor var_2893_to_fp16 = const()[name = string("op_2893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708868288)))]; + tensor var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721975552)))]; + tensor linear_102_cast_fp16 = linear(bias = var_2894_to_fp16, weight = var_2893_to_fp16, x = var_2884_cast_fp16)[name = string("linear_102_cast_fp16")]; + string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")]; + tensor x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_2899_to_fp16 = const()[name = string("op_2899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(721985856)))]; + tensor var_2900_to_fp16 = const()[name = string("op_2900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735093120)))]; + tensor linear_103_cast_fp16 = linear(bias = var_2900_to_fp16, weight = var_2899_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; + tensor k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_53_cast_fp16")]; + tensor v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor([14, 1, 448, 1280])]; + tensor v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_53_cast_fp16")]; + tensor k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; + tensor k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")]; + tensor v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor([14, 1, 1500, 1280])]; + tensor v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")]; + int32 var_2923 = const()[name = string("op_2923"), val = int32(-1)]; + tensor var_2941_axes_0 = const()[name = string("op_2941_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735095744)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735098368)))]; + fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2941_cast_fp16 = layer_norm(axes = var_2941_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2941_cast_fp16")]; + tensor var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735100992)))]; + tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738377856)))]; + tensor linear_104_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = var_2941_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_2956_to_fp16 = const()[name = string("op_2956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738380480)))]; + tensor linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2956_to_fp16, x = var_2941_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor var_2960_to_fp16 = const()[name = string("op_2960_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(741657344)))]; + tensor var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744934208)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2961_to_fp16, weight = var_2960_to_fp16, x = var_2941_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor var_2963_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2963_shape_cast_fp16")]; + int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)]; + int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)]; + bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)]; + string var_2963_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2963_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)]; + tensor var_2963_shape_cast_fp16_to_uint16 = cast(dtype = var_2963_shape_cast_fp16_to_uint16_dtype_0, x = var_2963_shape_cast_fp16)[name = string("cast_364")]; + uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2963_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")]; + string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_363")]; + int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")]; + tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([0])]; + tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([0])]; + tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; + tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([13])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")]; + tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([0])]; + tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; + tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; + int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; + bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; + tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")]; + tensor k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")]; + tensor v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")]; + int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)]; + int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1280)]; + int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)]; + bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)]; + tensor concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")]; + tensor var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_296, end_mask = var_2979_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2979_cast_fp16")]; + tensor var_2982_begin_0 = const()[name = string("op_2982_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2982_end_mask_0 = const()[name = string("op_2982_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2982_cast_fp16 = slice_by_index(begin = var_2982_begin_0, end = concat_296, end_mask = var_2982_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2982_cast_fp16")]; + tensor concat_298x = const()[name = string("concat_298x"), val = tensor([1, -1, 20, 64])]; + tensor var_2992_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2992_cast_fp16")]; + tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_107_cast_fp16 = mul(x = var_2992_cast_fp16, y = const_212_to_fp16)[name = string("q_107_cast_fp16")]; + tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, -1, 20, 64])]; + tensor var_2999_cast_fp16 = reshape(shape = concat_299x, x = var_2979_cast_fp16)[name = string("op_2999_cast_fp16")]; + tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_135_cast_fp16 = mul(x = var_2999_cast_fp16, y = const_213_to_fp16)[name = string("k_135_cast_fp16")]; + tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 20, 64])]; + tensor var_3006_cast_fp16 = reshape(shape = concat_300x, x = var_2982_cast_fp16)[name = string("op_3006_cast_fp16")]; + tensor var_3007 = const()[name = string("op_3007"), val = tensor([0, 2, 1, 3])]; + bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)]; + bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)]; + tensor transpose_309_perm_0 = const()[name = string("transpose_309_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_310_perm_0 = const()[name = string("transpose_310_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_310 = transpose(perm = transpose_310_perm_0, x = k_135_cast_fp16)[name = string("transpose_534")]; + tensor transpose_309 = transpose(perm = transpose_309_perm_0, x = q_107_cast_fp16)[name = string("transpose_535")]; + tensor qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_309, y = transpose_310)[name = string("qk_79_cast_fp16")]; + int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)]; + int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; + bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; + tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")]; + tensor var_3010_begin_0 = const()[name = string("op_3010_begin_0"), val = tensor([0, 0])]; + tensor var_3010_end_mask_0 = const()[name = string("op_3010_end_mask_0"), val = tensor([false, true])]; + tensor var_3010_cast_fp16 = slice_by_index(begin = var_3010_begin_0, end = concat_301, end_mask = var_3010_end_mask_0, x = mask_to_fp16)[name = string("op_3010_cast_fp16")]; + int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)]; + int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; + bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; + tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")]; + tensor var_3011_begin_0 = const()[name = string("op_3011_begin_0"), val = tensor([0, 0])]; + tensor var_3011_end_mask_0 = const()[name = string("op_3011_end_mask_0"), val = tensor([true, false])]; + tensor var_3011_cast_fp16 = slice_by_index(begin = var_3011_begin_0, end = concat_302, end_mask = var_3011_end_mask_0, x = var_3010_cast_fp16)[name = string("op_3011_cast_fp16")]; + tensor qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_3011_cast_fp16)[name = string("qk_81_cast_fp16")]; + tensor var_3014_cast_fp16 = softmax(axis = var_2923, x = qk_81_cast_fp16)[name = string("op_3014_cast_fp16")]; + bool var_3016_transpose_x_0 = const()[name = string("op_3016_transpose_x_0"), val = bool(false)]; + bool var_3016_transpose_y_0 = const()[name = string("op_3016_transpose_y_0"), val = bool(false)]; + tensor v_135_cast_fp16 = transpose(perm = var_3007, x = var_3006_cast_fp16)[name = string("transpose_536")]; + tensor var_3016_cast_fp16 = matmul(transpose_x = var_3016_transpose_x_0, transpose_y = var_3016_transpose_y_0, x = var_3014_cast_fp16, y = v_135_cast_fp16)[name = string("op_3016_cast_fp16")]; + tensor var_3017 = const()[name = string("op_3017"), val = tensor([0, 2, 1, 3])]; + tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 1280])]; + tensor var_3018_cast_fp16 = transpose(perm = var_3017, x = var_3016_cast_fp16)[name = string("transpose_533")]; + tensor x_241_cast_fp16 = reshape(shape = concat_303x, x = var_3018_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(744936832)))]; + tensor var_3023_to_fp16 = const()[name = string("op_3023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748213696)))]; + tensor linear_107_cast_fp16 = linear(bias = var_3023_to_fp16, weight = var_3022_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_3030_axes_0 = const()[name = string("op_3030_axes_0"), val = tensor([-1])]; + tensor blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748216320)))]; + tensor blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748218944)))]; + tensor var_3030_cast_fp16 = layer_norm(axes = var_3030_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_3030_cast_fp16")]; + tensor var_3039_to_fp16 = const()[name = string("op_3039_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748221568)))]; + tensor var_3040_to_fp16 = const()[name = string("op_3040_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751498432)))]; + tensor linear_108_cast_fp16 = linear(bias = var_3040_to_fp16, weight = var_3039_to_fp16, x = var_3030_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor concat_304 = const()[name = string("concat_304"), val = tensor([0, 0, 0])]; + tensor concat_305 = const()[name = string("concat_305"), val = tensor([0, 1500, 0])]; + tensor k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_306 = const()[name = string("concat_306"), val = tensor([0, 0, 0])]; + tensor concat_307 = const()[name = string("concat_307"), val = tensor([0, 1500, 0])]; + tensor v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_308x = const()[name = string("concat_308x"), val = tensor([1, -1, 20, 64])]; + tensor var_3060_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3060_cast_fp16")]; + tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_111_cast_fp16 = mul(x = var_3060_cast_fp16, y = const_214_to_fp16)[name = string("q_111_cast_fp16")]; + tensor var_3066 = const()[name = string("op_3066"), val = tensor([1, 1500, 20, -1])]; + tensor var_3067_cast_fp16 = reshape(shape = var_3066, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3067_cast_fp16")]; + tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_139_cast_fp16 = mul(x = var_3067_cast_fp16, y = const_215_to_fp16)[name = string("k_139_cast_fp16")]; + tensor var_3073 = const()[name = string("op_3073"), val = tensor([1, 1500, 20, -1])]; + tensor var_3074_cast_fp16 = reshape(shape = var_3073, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3074_cast_fp16")]; + tensor var_3075 = const()[name = string("op_3075"), val = tensor([0, 2, 1, 3])]; + bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)]; + bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)]; + tensor transpose_311_perm_0 = const()[name = string("transpose_311_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_312_perm_0 = const()[name = string("transpose_312_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_312 = transpose(perm = transpose_312_perm_0, x = k_139_cast_fp16)[name = string("transpose_530")]; + tensor transpose_311 = transpose(perm = transpose_311_perm_0, x = q_111_cast_fp16)[name = string("transpose_531")]; + tensor qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_311, y = transpose_312)[name = string("qk_83_cast_fp16")]; + tensor var_3079_cast_fp16 = softmax(axis = var_2923, x = qk_83_cast_fp16)[name = string("op_3079_cast_fp16")]; + bool var_3081_transpose_x_0 = const()[name = string("op_3081_transpose_x_0"), val = bool(false)]; + bool var_3081_transpose_y_0 = const()[name = string("op_3081_transpose_y_0"), val = bool(false)]; + tensor v_139_cast_fp16 = transpose(perm = var_3075, x = var_3074_cast_fp16)[name = string("transpose_532")]; + tensor var_3081_cast_fp16 = matmul(transpose_x = var_3081_transpose_x_0, transpose_y = var_3081_transpose_y_0, x = var_3079_cast_fp16, y = v_139_cast_fp16)[name = string("op_3081_cast_fp16")]; + tensor var_3082 = const()[name = string("op_3082"), val = tensor([0, 2, 1, 3])]; + tensor concat_309x = const()[name = string("concat_309x"), val = tensor([1, -1, 1280])]; + tensor var_3083_cast_fp16 = transpose(perm = var_3082, x = var_3081_cast_fp16)[name = string("transpose_529")]; + tensor x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3083_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor var_3087_to_fp16 = const()[name = string("op_3087_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751501056)))]; + tensor var_3088_to_fp16 = const()[name = string("op_3088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754777920)))]; + tensor linear_109_cast_fp16 = linear(bias = var_3088_to_fp16, weight = var_3087_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")]; + tensor var_3095_axes_0 = const()[name = string("op_3095_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754780544)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754783168)))]; + tensor var_3095_cast_fp16 = layer_norm(axes = var_3095_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2929_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3095_cast_fp16")]; + tensor var_3104_to_fp16 = const()[name = string("op_3104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(754785792)))]; + tensor var_3105_to_fp16 = const()[name = string("op_3105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767893056)))]; + tensor linear_110_cast_fp16 = linear(bias = var_3105_to_fp16, weight = var_3104_to_fp16, x = var_3095_cast_fp16)[name = string("linear_110_cast_fp16")]; + string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")]; + tensor x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_3110_to_fp16 = const()[name = string("op_3110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767903360)))]; + tensor var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781010624)))]; + tensor linear_111_cast_fp16 = linear(bias = var_3111_to_fp16, weight = var_3110_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")]; + tensor k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; + tensor k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_57_cast_fp16")]; + tensor v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor([15, 1, 448, 1280])]; + tensor v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_57_cast_fp16")]; + tensor k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; + tensor k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")]; + tensor v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor([15, 1, 1500, 1280])]; + tensor v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")]; + int32 var_3134 = const()[name = string("op_3134"), val = int32(-1)]; + tensor var_3152_axes_0 = const()[name = string("op_3152_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781013248)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781015872)))]; + fp16 var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3152_cast_fp16 = layer_norm(axes = var_3152_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3152_cast_fp16")]; + tensor var_3163_to_fp16 = const()[name = string("op_3163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(781018496)))]; + tensor var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784295360)))]; + tensor linear_112_cast_fp16 = linear(bias = var_3164_to_fp16, weight = var_3163_to_fp16, x = var_3152_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor var_3167_to_fp16 = const()[name = string("op_3167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784297984)))]; + tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3167_to_fp16, x = var_3152_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787574848)))]; + tensor var_3172_to_fp16 = const()[name = string("op_3172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790851712)))]; + tensor linear_114_cast_fp16 = linear(bias = var_3172_to_fp16, weight = var_3171_to_fp16, x = var_3152_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_3174_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3174_shape_cast_fp16")]; + int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)]; + int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)]; + bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)]; + string var_3174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)]; + tensor var_3174_shape_cast_fp16_to_uint16 = cast(dtype = var_3174_shape_cast_fp16_to_uint16_dtype_0, x = var_3174_shape_cast_fp16)[name = string("cast_362")]; + uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3174_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")]; + string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_361")]; + int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")]; + tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([0])]; + tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; + tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")]; + tensor concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor([14])]; + int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; + bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; + tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")]; + tensor concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor([0])]; + tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; + tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; + int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; + bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; + tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")]; + tensor k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")]; + tensor v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")]; + int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)]; + int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1280)]; + int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; + bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; + tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")]; + tensor var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_318, end_mask = var_3190_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3190_cast_fp16")]; + tensor var_3193_begin_0 = const()[name = string("op_3193_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3193_end_mask_0 = const()[name = string("op_3193_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3193_cast_fp16 = slice_by_index(begin = var_3193_begin_0, end = concat_318, end_mask = var_3193_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3193_cast_fp16")]; + tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, -1, 20, 64])]; + tensor var_3203_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3203_cast_fp16")]; + tensor const_216_to_fp16 = const()[name = string("const_216_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_115_cast_fp16 = mul(x = var_3203_cast_fp16, y = const_216_to_fp16)[name = string("q_115_cast_fp16")]; + tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, -1, 20, 64])]; + tensor var_3210_cast_fp16 = reshape(shape = concat_321x, x = var_3190_cast_fp16)[name = string("op_3210_cast_fp16")]; + tensor const_217_to_fp16 = const()[name = string("const_217_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_145_cast_fp16 = mul(x = var_3210_cast_fp16, y = const_217_to_fp16)[name = string("k_145_cast_fp16")]; + tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 20, 64])]; + tensor var_3217_cast_fp16 = reshape(shape = concat_322x, x = var_3193_cast_fp16)[name = string("op_3217_cast_fp16")]; + tensor var_3218 = const()[name = string("op_3218"), val = tensor([0, 2, 1, 3])]; + bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)]; + bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)]; + tensor transpose_313_perm_0 = const()[name = string("transpose_313_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_314_perm_0 = const()[name = string("transpose_314_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_314 = transpose(perm = transpose_314_perm_0, x = k_145_cast_fp16)[name = string("transpose_526")]; + tensor transpose_313 = transpose(perm = transpose_313_perm_0, x = q_115_cast_fp16)[name = string("transpose_527")]; + tensor qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_313, y = transpose_314)[name = string("qk_85_cast_fp16")]; + int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)]; + int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; + bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; + tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")]; + tensor var_3221_begin_0 = const()[name = string("op_3221_begin_0"), val = tensor([0, 0])]; + tensor var_3221_end_mask_0 = const()[name = string("op_3221_end_mask_0"), val = tensor([false, true])]; + tensor var_3221_cast_fp16 = slice_by_index(begin = var_3221_begin_0, end = concat_323, end_mask = var_3221_end_mask_0, x = mask_to_fp16)[name = string("op_3221_cast_fp16")]; + int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)]; + int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)]; + bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)]; + tensor concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")]; + tensor var_3222_begin_0 = const()[name = string("op_3222_begin_0"), val = tensor([0, 0])]; + tensor var_3222_end_mask_0 = const()[name = string("op_3222_end_mask_0"), val = tensor([true, false])]; + tensor var_3222_cast_fp16 = slice_by_index(begin = var_3222_begin_0, end = concat_324, end_mask = var_3222_end_mask_0, x = var_3221_cast_fp16)[name = string("op_3222_cast_fp16")]; + tensor qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3222_cast_fp16)[name = string("qk_87_cast_fp16")]; + tensor var_3225_cast_fp16 = softmax(axis = var_3134, x = qk_87_cast_fp16)[name = string("op_3225_cast_fp16")]; + bool var_3227_transpose_x_0 = const()[name = string("op_3227_transpose_x_0"), val = bool(false)]; + bool var_3227_transpose_y_0 = const()[name = string("op_3227_transpose_y_0"), val = bool(false)]; + tensor v_145_cast_fp16 = transpose(perm = var_3218, x = var_3217_cast_fp16)[name = string("transpose_528")]; + tensor var_3227_cast_fp16 = matmul(transpose_x = var_3227_transpose_x_0, transpose_y = var_3227_transpose_y_0, x = var_3225_cast_fp16, y = v_145_cast_fp16)[name = string("op_3227_cast_fp16")]; + tensor var_3228 = const()[name = string("op_3228"), val = tensor([0, 2, 1, 3])]; + tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 1280])]; + tensor var_3229_cast_fp16 = transpose(perm = var_3228, x = var_3227_cast_fp16)[name = string("transpose_525")]; + tensor x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3229_cast_fp16)[name = string("x_259_cast_fp16")]; + tensor var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790854336)))]; + tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794131200)))]; + tensor linear_115_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_3241_axes_0 = const()[name = string("op_3241_axes_0"), val = tensor([-1])]; + tensor blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794133824)))]; + tensor blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794136448)))]; + tensor var_3241_cast_fp16 = layer_norm(axes = var_3241_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3241_cast_fp16")]; + tensor var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(794139072)))]; + tensor var_3251_to_fp16 = const()[name = string("op_3251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797415936)))]; + tensor linear_116_cast_fp16 = linear(bias = var_3251_to_fp16, weight = var_3250_to_fp16, x = var_3241_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor concat_326 = const()[name = string("concat_326"), val = tensor([0, 0, 0])]; + tensor concat_327 = const()[name = string("concat_327"), val = tensor([0, 1500, 0])]; + tensor k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_328 = const()[name = string("concat_328"), val = tensor([0, 0, 0])]; + tensor concat_329 = const()[name = string("concat_329"), val = tensor([0, 1500, 0])]; + tensor v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 20, 64])]; + tensor var_3271_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3271_cast_fp16")]; + tensor const_218_to_fp16 = const()[name = string("const_218_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_119_cast_fp16 = mul(x = var_3271_cast_fp16, y = const_218_to_fp16)[name = string("q_119_cast_fp16")]; + tensor var_3277 = const()[name = string("op_3277"), val = tensor([1, 1500, 20, -1])]; + tensor var_3278_cast_fp16 = reshape(shape = var_3277, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor const_219_to_fp16 = const()[name = string("const_219_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_149_cast_fp16 = mul(x = var_3278_cast_fp16, y = const_219_to_fp16)[name = string("k_149_cast_fp16")]; + tensor var_3284 = const()[name = string("op_3284"), val = tensor([1, 1500, 20, -1])]; + tensor var_3285_cast_fp16 = reshape(shape = var_3284, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3285_cast_fp16")]; + tensor var_3286 = const()[name = string("op_3286"), val = tensor([0, 2, 1, 3])]; + bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)]; + bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)]; + tensor transpose_315_perm_0 = const()[name = string("transpose_315_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_316_perm_0 = const()[name = string("transpose_316_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_316 = transpose(perm = transpose_316_perm_0, x = k_149_cast_fp16)[name = string("transpose_522")]; + tensor transpose_315 = transpose(perm = transpose_315_perm_0, x = q_119_cast_fp16)[name = string("transpose_523")]; + tensor qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_315, y = transpose_316)[name = string("qk_89_cast_fp16")]; + tensor var_3290_cast_fp16 = softmax(axis = var_3134, x = qk_89_cast_fp16)[name = string("op_3290_cast_fp16")]; + bool var_3292_transpose_x_0 = const()[name = string("op_3292_transpose_x_0"), val = bool(false)]; + bool var_3292_transpose_y_0 = const()[name = string("op_3292_transpose_y_0"), val = bool(false)]; + tensor v_149_cast_fp16 = transpose(perm = var_3286, x = var_3285_cast_fp16)[name = string("transpose_524")]; + tensor var_3292_cast_fp16 = matmul(transpose_x = var_3292_transpose_x_0, transpose_y = var_3292_transpose_y_0, x = var_3290_cast_fp16, y = v_149_cast_fp16)[name = string("op_3292_cast_fp16")]; + tensor var_3293 = const()[name = string("op_3293"), val = tensor([0, 2, 1, 3])]; + tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 1280])]; + tensor var_3294_cast_fp16 = transpose(perm = var_3293, x = var_3292_cast_fp16)[name = string("transpose_521")]; + tensor x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3294_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_3298_to_fp16 = const()[name = string("op_3298_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797418560)))]; + tensor var_3299_to_fp16 = const()[name = string("op_3299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800695424)))]; + tensor linear_117_cast_fp16 = linear(bias = var_3299_to_fp16, weight = var_3298_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")]; + tensor var_3306_axes_0 = const()[name = string("op_3306_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800698048)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800700672)))]; + tensor var_3306_cast_fp16 = layer_norm(axes = var_3306_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3140_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3306_cast_fp16")]; + tensor var_3315_to_fp16 = const()[name = string("op_3315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800703296)))]; + tensor var_3316_to_fp16 = const()[name = string("op_3316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813810560)))]; + tensor linear_118_cast_fp16 = linear(bias = var_3316_to_fp16, weight = var_3315_to_fp16, x = var_3306_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")]; + tensor x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")]; + tensor var_3321_to_fp16 = const()[name = string("op_3321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(813820864)))]; + tensor var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826928128)))]; + tensor linear_119_cast_fp16 = linear(bias = var_3322_to_fp16, weight = var_3321_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")]; + tensor k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; + tensor k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_61_cast_fp16")]; + tensor v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor([16, 1, 448, 1280])]; + tensor v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_61_cast_fp16")]; + tensor k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; + tensor k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")]; + tensor v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor([16, 1, 1500, 1280])]; + tensor v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")]; + int32 var_3345 = const()[name = string("op_3345"), val = int32(-1)]; + tensor var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826930752)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826933376)))]; + fp16 var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3363_cast_fp16 = layer_norm(axes = var_3363_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3363_cast_fp16")]; + tensor var_3374_to_fp16 = const()[name = string("op_3374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826936000)))]; + tensor var_3375_to_fp16 = const()[name = string("op_3375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830212864)))]; + tensor linear_120_cast_fp16 = linear(bias = var_3375_to_fp16, weight = var_3374_to_fp16, x = var_3363_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_3378_to_fp16 = const()[name = string("op_3378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(830215488)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3378_to_fp16, x = var_3363_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_3382_to_fp16 = const()[name = string("op_3382_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833492352)))]; + tensor var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836769216)))]; + tensor linear_122_cast_fp16 = linear(bias = var_3383_to_fp16, weight = var_3382_to_fp16, x = var_3363_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_3385_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3385_shape_cast_fp16")]; + int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; + int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; + bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; + string var_3385_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3385_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)]; + tensor var_3385_shape_cast_fp16_to_uint16 = cast(dtype = var_3385_shape_cast_fp16_to_uint16_dtype_0, x = var_3385_shape_cast_fp16)[name = string("cast_360")]; + uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3385_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; + string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_359")]; + int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([0])]; + tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; + tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")]; + tensor concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor([15])]; + int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)]; + bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)]; + tensor concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")]; + tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([0])]; + tensor concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor([0])]; + tensor concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor([0])]; + int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; + bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; + tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")]; + tensor k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = k_cache1)[name = string("coreml_update_state_94")]; + tensor v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = v_cache1)[name = string("coreml_update_state_95")]; + int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; + int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1280)]; + int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; + bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; + tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")]; + tensor var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_340, end_mask = var_3401_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3401_cast_fp16")]; + tensor var_3404_begin_0 = const()[name = string("op_3404_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3404_end_mask_0 = const()[name = string("op_3404_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3404_cast_fp16 = slice_by_index(begin = var_3404_begin_0, end = concat_340, end_mask = var_3404_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3404_cast_fp16")]; + tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 20, 64])]; + tensor var_3414_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3414_cast_fp16")]; + tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_123_cast_fp16 = mul(x = var_3414_cast_fp16, y = const_220_to_fp16)[name = string("q_123_cast_fp16")]; + tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 20, 64])]; + tensor var_3421_cast_fp16 = reshape(shape = concat_343x, x = var_3401_cast_fp16)[name = string("op_3421_cast_fp16")]; + tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_155_cast_fp16 = mul(x = var_3421_cast_fp16, y = const_221_to_fp16)[name = string("k_155_cast_fp16")]; + tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 20, 64])]; + tensor var_3428_cast_fp16 = reshape(shape = concat_344x, x = var_3404_cast_fp16)[name = string("op_3428_cast_fp16")]; + tensor var_3429 = const()[name = string("op_3429"), val = tensor([0, 2, 1, 3])]; + bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)]; + bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)]; + tensor transpose_317_perm_0 = const()[name = string("transpose_317_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_318_perm_0 = const()[name = string("transpose_318_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_318 = transpose(perm = transpose_318_perm_0, x = k_155_cast_fp16)[name = string("transpose_518")]; + tensor transpose_317 = transpose(perm = transpose_317_perm_0, x = q_123_cast_fp16)[name = string("transpose_519")]; + tensor qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_317, y = transpose_318)[name = string("qk_91_cast_fp16")]; + int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)]; + int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; + bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; + tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")]; + tensor var_3432_begin_0 = const()[name = string("op_3432_begin_0"), val = tensor([0, 0])]; + tensor var_3432_end_mask_0 = const()[name = string("op_3432_end_mask_0"), val = tensor([false, true])]; + tensor var_3432_cast_fp16 = slice_by_index(begin = var_3432_begin_0, end = concat_345, end_mask = var_3432_end_mask_0, x = mask_to_fp16)[name = string("op_3432_cast_fp16")]; + int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)]; + int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)]; + bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)]; + tensor concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")]; + tensor var_3433_begin_0 = const()[name = string("op_3433_begin_0"), val = tensor([0, 0])]; + tensor var_3433_end_mask_0 = const()[name = string("op_3433_end_mask_0"), val = tensor([true, false])]; + tensor var_3433_cast_fp16 = slice_by_index(begin = var_3433_begin_0, end = concat_346, end_mask = var_3433_end_mask_0, x = var_3432_cast_fp16)[name = string("op_3433_cast_fp16")]; + tensor qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3433_cast_fp16)[name = string("qk_93_cast_fp16")]; + tensor var_3436_cast_fp16 = softmax(axis = var_3345, x = qk_93_cast_fp16)[name = string("op_3436_cast_fp16")]; + bool var_3438_transpose_x_0 = const()[name = string("op_3438_transpose_x_0"), val = bool(false)]; + bool var_3438_transpose_y_0 = const()[name = string("op_3438_transpose_y_0"), val = bool(false)]; + tensor v_155_cast_fp16 = transpose(perm = var_3429, x = var_3428_cast_fp16)[name = string("transpose_520")]; + tensor var_3438_cast_fp16 = matmul(transpose_x = var_3438_transpose_x_0, transpose_y = var_3438_transpose_y_0, x = var_3436_cast_fp16, y = v_155_cast_fp16)[name = string("op_3438_cast_fp16")]; + tensor var_3439 = const()[name = string("op_3439"), val = tensor([0, 2, 1, 3])]; + tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 1280])]; + tensor var_3440_cast_fp16 = transpose(perm = var_3439, x = var_3438_cast_fp16)[name = string("transpose_517")]; + tensor x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3440_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_3444_to_fp16 = const()[name = string("op_3444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(836771840)))]; + tensor var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840048704)))]; + tensor linear_123_cast_fp16 = linear(bias = var_3445_to_fp16, weight = var_3444_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")]; + tensor var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor([-1])]; + tensor blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840051328)))]; + tensor blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840053952)))]; + tensor var_3452_cast_fp16 = layer_norm(axes = var_3452_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3452_cast_fp16")]; + tensor var_3461_to_fp16 = const()[name = string("op_3461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(840056576)))]; + tensor var_3462_to_fp16 = const()[name = string("op_3462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843333440)))]; + tensor linear_124_cast_fp16 = linear(bias = var_3462_to_fp16, weight = var_3461_to_fp16, x = var_3452_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor concat_348 = const()[name = string("concat_348"), val = tensor([0, 0, 0])]; + tensor concat_349 = const()[name = string("concat_349"), val = tensor([0, 1500, 0])]; + tensor k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_350 = const()[name = string("concat_350"), val = tensor([0, 0, 0])]; + tensor concat_351 = const()[name = string("concat_351"), val = tensor([0, 1500, 0])]; + tensor v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_352x = const()[name = string("concat_352x"), val = tensor([1, -1, 20, 64])]; + tensor var_3482_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3482_cast_fp16")]; + tensor const_222_to_fp16 = const()[name = string("const_222_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_127_cast_fp16 = mul(x = var_3482_cast_fp16, y = const_222_to_fp16)[name = string("q_127_cast_fp16")]; + tensor var_3488 = const()[name = string("op_3488"), val = tensor([1, 1500, 20, -1])]; + tensor var_3489_cast_fp16 = reshape(shape = var_3488, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3489_cast_fp16")]; + tensor const_223_to_fp16 = const()[name = string("const_223_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_159_cast_fp16 = mul(x = var_3489_cast_fp16, y = const_223_to_fp16)[name = string("k_159_cast_fp16")]; + tensor var_3495 = const()[name = string("op_3495"), val = tensor([1, 1500, 20, -1])]; + tensor var_3496_cast_fp16 = reshape(shape = var_3495, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3496_cast_fp16")]; + tensor var_3497 = const()[name = string("op_3497"), val = tensor([0, 2, 1, 3])]; + bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)]; + bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)]; + tensor transpose_319_perm_0 = const()[name = string("transpose_319_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_320_perm_0 = const()[name = string("transpose_320_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_320 = transpose(perm = transpose_320_perm_0, x = k_159_cast_fp16)[name = string("transpose_514")]; + tensor transpose_319 = transpose(perm = transpose_319_perm_0, x = q_127_cast_fp16)[name = string("transpose_515")]; + tensor qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_319, y = transpose_320)[name = string("qk_95_cast_fp16")]; + tensor var_3501_cast_fp16 = softmax(axis = var_3345, x = qk_95_cast_fp16)[name = string("op_3501_cast_fp16")]; + bool var_3503_transpose_x_0 = const()[name = string("op_3503_transpose_x_0"), val = bool(false)]; + bool var_3503_transpose_y_0 = const()[name = string("op_3503_transpose_y_0"), val = bool(false)]; + tensor v_159_cast_fp16 = transpose(perm = var_3497, x = var_3496_cast_fp16)[name = string("transpose_516")]; + tensor var_3503_cast_fp16 = matmul(transpose_x = var_3503_transpose_x_0, transpose_y = var_3503_transpose_y_0, x = var_3501_cast_fp16, y = v_159_cast_fp16)[name = string("op_3503_cast_fp16")]; + tensor var_3504 = const()[name = string("op_3504"), val = tensor([0, 2, 1, 3])]; + tensor concat_353x = const()[name = string("concat_353x"), val = tensor([1, -1, 1280])]; + tensor var_3505_cast_fp16 = transpose(perm = var_3504, x = var_3503_cast_fp16)[name = string("transpose_513")]; + tensor x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3505_cast_fp16)[name = string("x_283_cast_fp16")]; + tensor var_3509_to_fp16 = const()[name = string("op_3509_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(843336064)))]; + tensor var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846612928)))]; + tensor linear_125_cast_fp16 = linear(bias = var_3510_to_fp16, weight = var_3509_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")]; + tensor var_3517_axes_0 = const()[name = string("op_3517_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846615552)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846618176)))]; + tensor var_3517_cast_fp16 = layer_norm(axes = var_3517_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3351_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3517_cast_fp16")]; + tensor var_3526_to_fp16 = const()[name = string("op_3526_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846620800)))]; + tensor var_3527_to_fp16 = const()[name = string("op_3527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859728064)))]; + tensor linear_126_cast_fp16 = linear(bias = var_3527_to_fp16, weight = var_3526_to_fp16, x = var_3517_cast_fp16)[name = string("linear_126_cast_fp16")]; + string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")]; + tensor x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_3532_to_fp16 = const()[name = string("op_3532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(859738368)))]; + tensor var_3533_to_fp16 = const()[name = string("op_3533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872845632)))]; + tensor linear_127_cast_fp16 = linear(bias = var_3533_to_fp16, weight = var_3532_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")]; + tensor k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; + tensor k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_94)[name = string("k_cache_65_cast_fp16")]; + tensor v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor([17, 1, 448, 1280])]; + tensor v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_95)[name = string("v_cache_65_cast_fp16")]; + tensor k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; + tensor k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")]; + tensor v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor([17, 1, 1500, 1280])]; + tensor v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")]; + int32 var_3556 = const()[name = string("op_3556"), val = int32(-1)]; + tensor var_3574_axes_0 = const()[name = string("op_3574_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872848256)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872850880)))]; + fp16 var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3574_cast_fp16 = layer_norm(axes = var_3574_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3574_cast_fp16")]; + tensor var_3585_to_fp16 = const()[name = string("op_3585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(872853504)))]; + tensor var_3586_to_fp16 = const()[name = string("op_3586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876130368)))]; + tensor linear_128_cast_fp16 = linear(bias = var_3586_to_fp16, weight = var_3585_to_fp16, x = var_3574_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_3589_to_fp16 = const()[name = string("op_3589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(876132992)))]; + tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3589_to_fp16, x = var_3574_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor var_3593_to_fp16 = const()[name = string("op_3593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(879409856)))]; + tensor var_3594_to_fp16 = const()[name = string("op_3594_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882686720)))]; + tensor linear_130_cast_fp16 = linear(bias = var_3594_to_fp16, weight = var_3593_to_fp16, x = var_3574_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_3596_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3596_shape_cast_fp16")]; + int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)]; + int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)]; + bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)]; + string var_3596_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3596_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)]; + tensor var_3596_shape_cast_fp16_to_uint16 = cast(dtype = var_3596_shape_cast_fp16_to_uint16_dtype_0, x = var_3596_shape_cast_fp16)[name = string("cast_358")]; + uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3596_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")]; + string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_357")]; + int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([0])]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([0])]; + tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; + tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")]; + tensor concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor([16])]; + int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; + bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; + tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")]; + tensor concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor([0])]; + tensor concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor([0])]; + tensor concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor([0])]; + int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; + bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; + tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")]; + tensor k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_94)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = k_cache1)[name = string("coreml_update_state_96")]; + tensor v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_95)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = v_cache1)[name = string("coreml_update_state_97")]; + int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)]; + int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1280)]; + int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; + bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; + tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")]; + tensor var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_362, end_mask = var_3612_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3612_cast_fp16")]; + tensor var_3615_begin_0 = const()[name = string("op_3615_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3615_end_mask_0 = const()[name = string("op_3615_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3615_cast_fp16 = slice_by_index(begin = var_3615_begin_0, end = concat_362, end_mask = var_3615_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3615_cast_fp16")]; + tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 20, 64])]; + tensor var_3625_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3625_cast_fp16")]; + tensor const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_131_cast_fp16 = mul(x = var_3625_cast_fp16, y = const_224_to_fp16)[name = string("q_131_cast_fp16")]; + tensor concat_365x = const()[name = string("concat_365x"), val = tensor([1, -1, 20, 64])]; + tensor var_3632_cast_fp16 = reshape(shape = concat_365x, x = var_3612_cast_fp16)[name = string("op_3632_cast_fp16")]; + tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_165_cast_fp16 = mul(x = var_3632_cast_fp16, y = const_225_to_fp16)[name = string("k_165_cast_fp16")]; + tensor concat_366x = const()[name = string("concat_366x"), val = tensor([1, -1, 20, 64])]; + tensor var_3639_cast_fp16 = reshape(shape = concat_366x, x = var_3615_cast_fp16)[name = string("op_3639_cast_fp16")]; + tensor var_3640 = const()[name = string("op_3640"), val = tensor([0, 2, 1, 3])]; + bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)]; + bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)]; + tensor transpose_321_perm_0 = const()[name = string("transpose_321_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_322_perm_0 = const()[name = string("transpose_322_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_322 = transpose(perm = transpose_322_perm_0, x = k_165_cast_fp16)[name = string("transpose_510")]; + tensor transpose_321 = transpose(perm = transpose_321_perm_0, x = q_131_cast_fp16)[name = string("transpose_511")]; + tensor qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_321, y = transpose_322)[name = string("qk_97_cast_fp16")]; + int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)]; + int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; + bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; + tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")]; + tensor var_3643_begin_0 = const()[name = string("op_3643_begin_0"), val = tensor([0, 0])]; + tensor var_3643_end_mask_0 = const()[name = string("op_3643_end_mask_0"), val = tensor([false, true])]; + tensor var_3643_cast_fp16 = slice_by_index(begin = var_3643_begin_0, end = concat_367, end_mask = var_3643_end_mask_0, x = mask_to_fp16)[name = string("op_3643_cast_fp16")]; + int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)]; + int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)]; + bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)]; + tensor concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")]; + tensor var_3644_begin_0 = const()[name = string("op_3644_begin_0"), val = tensor([0, 0])]; + tensor var_3644_end_mask_0 = const()[name = string("op_3644_end_mask_0"), val = tensor([true, false])]; + tensor var_3644_cast_fp16 = slice_by_index(begin = var_3644_begin_0, end = concat_368, end_mask = var_3644_end_mask_0, x = var_3643_cast_fp16)[name = string("op_3644_cast_fp16")]; + tensor qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3644_cast_fp16)[name = string("qk_99_cast_fp16")]; + tensor var_3647_cast_fp16 = softmax(axis = var_3556, x = qk_99_cast_fp16)[name = string("op_3647_cast_fp16")]; + bool var_3649_transpose_x_0 = const()[name = string("op_3649_transpose_x_0"), val = bool(false)]; + bool var_3649_transpose_y_0 = const()[name = string("op_3649_transpose_y_0"), val = bool(false)]; + tensor v_165_cast_fp16 = transpose(perm = var_3640, x = var_3639_cast_fp16)[name = string("transpose_512")]; + tensor var_3649_cast_fp16 = matmul(transpose_x = var_3649_transpose_x_0, transpose_y = var_3649_transpose_y_0, x = var_3647_cast_fp16, y = v_165_cast_fp16)[name = string("op_3649_cast_fp16")]; + tensor var_3650 = const()[name = string("op_3650"), val = tensor([0, 2, 1, 3])]; + tensor concat_369x = const()[name = string("concat_369x"), val = tensor([1, -1, 1280])]; + tensor var_3651_cast_fp16 = transpose(perm = var_3650, x = var_3649_cast_fp16)[name = string("transpose_509")]; + tensor x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3651_cast_fp16)[name = string("x_295_cast_fp16")]; + tensor var_3655_to_fp16 = const()[name = string("op_3655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(882689344)))]; + tensor var_3656_to_fp16 = const()[name = string("op_3656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885966208)))]; + tensor linear_131_cast_fp16 = linear(bias = var_3656_to_fp16, weight = var_3655_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")]; + tensor var_3663_axes_0 = const()[name = string("op_3663_axes_0"), val = tensor([-1])]; + tensor blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885968832)))]; + tensor blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885971456)))]; + tensor var_3663_cast_fp16 = layer_norm(axes = var_3663_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3663_cast_fp16")]; + tensor var_3672_to_fp16 = const()[name = string("op_3672_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885974080)))]; + tensor var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889250944)))]; + tensor linear_132_cast_fp16 = linear(bias = var_3673_to_fp16, weight = var_3672_to_fp16, x = var_3663_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor concat_370 = const()[name = string("concat_370"), val = tensor([0, 0, 0])]; + tensor concat_371 = const()[name = string("concat_371"), val = tensor([0, 1500, 0])]; + tensor k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_372 = const()[name = string("concat_372"), val = tensor([0, 0, 0])]; + tensor concat_373 = const()[name = string("concat_373"), val = tensor([0, 1500, 0])]; + tensor v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_374x = const()[name = string("concat_374x"), val = tensor([1, -1, 20, 64])]; + tensor var_3693_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3693_cast_fp16")]; + tensor const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_135_cast_fp16 = mul(x = var_3693_cast_fp16, y = const_226_to_fp16)[name = string("q_135_cast_fp16")]; + tensor var_3699 = const()[name = string("op_3699"), val = tensor([1, 1500, 20, -1])]; + tensor var_3700_cast_fp16 = reshape(shape = var_3699, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3700_cast_fp16")]; + tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_169_cast_fp16 = mul(x = var_3700_cast_fp16, y = const_227_to_fp16)[name = string("k_169_cast_fp16")]; + tensor var_3706 = const()[name = string("op_3706"), val = tensor([1, 1500, 20, -1])]; + tensor var_3707_cast_fp16 = reshape(shape = var_3706, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3707_cast_fp16")]; + tensor var_3708 = const()[name = string("op_3708"), val = tensor([0, 2, 1, 3])]; + bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)]; + bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)]; + tensor transpose_323_perm_0 = const()[name = string("transpose_323_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_324_perm_0 = const()[name = string("transpose_324_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_324 = transpose(perm = transpose_324_perm_0, x = k_169_cast_fp16)[name = string("transpose_506")]; + tensor transpose_323 = transpose(perm = transpose_323_perm_0, x = q_135_cast_fp16)[name = string("transpose_507")]; + tensor qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_323, y = transpose_324)[name = string("qk_101_cast_fp16")]; + tensor var_3712_cast_fp16 = softmax(axis = var_3556, x = qk_101_cast_fp16)[name = string("op_3712_cast_fp16")]; + bool var_3714_transpose_x_0 = const()[name = string("op_3714_transpose_x_0"), val = bool(false)]; + bool var_3714_transpose_y_0 = const()[name = string("op_3714_transpose_y_0"), val = bool(false)]; + tensor v_169_cast_fp16 = transpose(perm = var_3708, x = var_3707_cast_fp16)[name = string("transpose_508")]; + tensor var_3714_cast_fp16 = matmul(transpose_x = var_3714_transpose_x_0, transpose_y = var_3714_transpose_y_0, x = var_3712_cast_fp16, y = v_169_cast_fp16)[name = string("op_3714_cast_fp16")]; + tensor var_3715 = const()[name = string("op_3715"), val = tensor([0, 2, 1, 3])]; + tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, -1, 1280])]; + tensor var_3716_cast_fp16 = transpose(perm = var_3715, x = var_3714_cast_fp16)[name = string("transpose_505")]; + tensor x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3716_cast_fp16)[name = string("x_301_cast_fp16")]; + tensor var_3720_to_fp16 = const()[name = string("op_3720_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(889253568)))]; + tensor var_3721_to_fp16 = const()[name = string("op_3721_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892530432)))]; + tensor linear_133_cast_fp16 = linear(bias = var_3721_to_fp16, weight = var_3720_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")]; + tensor var_3728_axes_0 = const()[name = string("op_3728_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892533056)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892535680)))]; + tensor var_3728_cast_fp16 = layer_norm(axes = var_3728_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3562_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3728_cast_fp16")]; + tensor var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(892538304)))]; + tensor var_3738_to_fp16 = const()[name = string("op_3738_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905645568)))]; + tensor linear_134_cast_fp16 = linear(bias = var_3738_to_fp16, weight = var_3737_to_fp16, x = var_3728_cast_fp16)[name = string("linear_134_cast_fp16")]; + string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")]; + tensor x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")]; + tensor var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(905655872)))]; + tensor var_3744_to_fp16 = const()[name = string("op_3744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918763136)))]; + tensor linear_135_cast_fp16 = linear(bias = var_3744_to_fp16, weight = var_3743_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; + tensor k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_96)[name = string("k_cache_69_cast_fp16")]; + tensor v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor([18, 1, 448, 1280])]; + tensor v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_97)[name = string("v_cache_69_cast_fp16")]; + tensor k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; + tensor k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")]; + tensor v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor([18, 1, 1500, 1280])]; + tensor v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")]; + int32 var_3767 = const()[name = string("op_3767"), val = int32(-1)]; + tensor var_3785_axes_0 = const()[name = string("op_3785_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918765760)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918768384)))]; + fp16 var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3785_cast_fp16 = layer_norm(axes = var_3785_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3785_cast_fp16")]; + tensor var_3796_to_fp16 = const()[name = string("op_3796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(918771008)))]; + tensor var_3797_to_fp16 = const()[name = string("op_3797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922047872)))]; + tensor linear_136_cast_fp16 = linear(bias = var_3797_to_fp16, weight = var_3796_to_fp16, x = var_3785_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor var_3800_to_fp16 = const()[name = string("op_3800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(922050496)))]; + tensor linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3800_to_fp16, x = var_3785_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3804_to_fp16 = const()[name = string("op_3804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(925327360)))]; + tensor var_3805_to_fp16 = const()[name = string("op_3805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928604224)))]; + tensor linear_138_cast_fp16 = linear(bias = var_3805_to_fp16, weight = var_3804_to_fp16, x = var_3785_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_3807_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3807_shape_cast_fp16")]; + int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; + int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; + bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; + string var_3807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)]; + tensor var_3807_shape_cast_fp16_to_uint16 = cast(dtype = var_3807_shape_cast_fp16_to_uint16_dtype_0, x = var_3807_shape_cast_fp16)[name = string("cast_356")]; + uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3807_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; + string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_355")]; + int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")]; + tensor expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([0])]; + tensor expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor([0])]; + tensor expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")]; + tensor concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor([17])]; + int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; + bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; + tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")]; + tensor concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor([0])]; + tensor concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor([0])]; + tensor concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor([0])]; + int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; + bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; + tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")]; + tensor k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_96)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = k_cache1)[name = string("coreml_update_state_98")]; + tensor v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_97)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = v_cache1)[name = string("coreml_update_state_99")]; + int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)]; + int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1280)]; + int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; + bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; + tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")]; + tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_384, end_mask = var_3823_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3823_cast_fp16")]; + tensor var_3826_begin_0 = const()[name = string("op_3826_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3826_end_mask_0 = const()[name = string("op_3826_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = concat_384, end_mask = var_3826_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3826_cast_fp16")]; + tensor concat_386x = const()[name = string("concat_386x"), val = tensor([1, -1, 20, 64])]; + tensor var_3836_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3836_cast_fp16")]; + tensor const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_139_cast_fp16 = mul(x = var_3836_cast_fp16, y = const_228_to_fp16)[name = string("q_139_cast_fp16")]; + tensor concat_387x = const()[name = string("concat_387x"), val = tensor([1, -1, 20, 64])]; + tensor var_3843_cast_fp16 = reshape(shape = concat_387x, x = var_3823_cast_fp16)[name = string("op_3843_cast_fp16")]; + tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_175_cast_fp16 = mul(x = var_3843_cast_fp16, y = const_229_to_fp16)[name = string("k_175_cast_fp16")]; + tensor concat_388x = const()[name = string("concat_388x"), val = tensor([1, -1, 20, 64])]; + tensor var_3850_cast_fp16 = reshape(shape = concat_388x, x = var_3826_cast_fp16)[name = string("op_3850_cast_fp16")]; + tensor var_3851 = const()[name = string("op_3851"), val = tensor([0, 2, 1, 3])]; + bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)]; + bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)]; + tensor transpose_325_perm_0 = const()[name = string("transpose_325_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_326_perm_0 = const()[name = string("transpose_326_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_326 = transpose(perm = transpose_326_perm_0, x = k_175_cast_fp16)[name = string("transpose_502")]; + tensor transpose_325 = transpose(perm = transpose_325_perm_0, x = q_139_cast_fp16)[name = string("transpose_503")]; + tensor qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_325, y = transpose_326)[name = string("qk_103_cast_fp16")]; + int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)]; + int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)]; + bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)]; + tensor concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")]; + tensor var_3854_begin_0 = const()[name = string("op_3854_begin_0"), val = tensor([0, 0])]; + tensor var_3854_end_mask_0 = const()[name = string("op_3854_end_mask_0"), val = tensor([false, true])]; + tensor var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = concat_389, end_mask = var_3854_end_mask_0, x = mask_to_fp16)[name = string("op_3854_cast_fp16")]; + int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)]; + int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)]; + bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)]; + tensor concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")]; + tensor var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor([0, 0])]; + tensor var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor([true, false])]; + tensor var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_390, end_mask = var_3855_end_mask_0, x = var_3854_cast_fp16)[name = string("op_3855_cast_fp16")]; + tensor qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3855_cast_fp16)[name = string("qk_105_cast_fp16")]; + tensor var_3858_cast_fp16 = softmax(axis = var_3767, x = qk_105_cast_fp16)[name = string("op_3858_cast_fp16")]; + bool var_3860_transpose_x_0 = const()[name = string("op_3860_transpose_x_0"), val = bool(false)]; + bool var_3860_transpose_y_0 = const()[name = string("op_3860_transpose_y_0"), val = bool(false)]; + tensor v_175_cast_fp16 = transpose(perm = var_3851, x = var_3850_cast_fp16)[name = string("transpose_504")]; + tensor var_3860_cast_fp16 = matmul(transpose_x = var_3860_transpose_x_0, transpose_y = var_3860_transpose_y_0, x = var_3858_cast_fp16, y = v_175_cast_fp16)[name = string("op_3860_cast_fp16")]; + tensor var_3861 = const()[name = string("op_3861"), val = tensor([0, 2, 1, 3])]; + tensor concat_391x = const()[name = string("concat_391x"), val = tensor([1, -1, 1280])]; + tensor var_3862_cast_fp16 = transpose(perm = var_3861, x = var_3860_cast_fp16)[name = string("transpose_501")]; + tensor x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3862_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_3866_to_fp16 = const()[name = string("op_3866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(928606848)))]; + tensor var_3867_to_fp16 = const()[name = string("op_3867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931883712)))]; + tensor linear_139_cast_fp16 = linear(bias = var_3867_to_fp16, weight = var_3866_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")]; + tensor var_3874_axes_0 = const()[name = string("op_3874_axes_0"), val = tensor([-1])]; + tensor blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931886336)))]; + tensor blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931888960)))]; + tensor var_3874_cast_fp16 = layer_norm(axes = var_3874_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3874_cast_fp16")]; + tensor var_3883_to_fp16 = const()[name = string("op_3883_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(931891584)))]; + tensor var_3884_to_fp16 = const()[name = string("op_3884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935168448)))]; + tensor linear_140_cast_fp16 = linear(bias = var_3884_to_fp16, weight = var_3883_to_fp16, x = var_3874_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor concat_392 = const()[name = string("concat_392"), val = tensor([0, 0, 0])]; + tensor concat_393 = const()[name = string("concat_393"), val = tensor([0, 1500, 0])]; + tensor k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_394 = const()[name = string("concat_394"), val = tensor([0, 0, 0])]; + tensor concat_395 = const()[name = string("concat_395"), val = tensor([0, 1500, 0])]; + tensor v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, -1, 20, 64])]; + tensor var_3904_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3904_cast_fp16")]; + tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_143_cast_fp16 = mul(x = var_3904_cast_fp16, y = const_230_to_fp16)[name = string("q_143_cast_fp16")]; + tensor var_3910 = const()[name = string("op_3910"), val = tensor([1, 1500, 20, -1])]; + tensor var_3911_cast_fp16 = reshape(shape = var_3910, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3911_cast_fp16")]; + tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_179_cast_fp16 = mul(x = var_3911_cast_fp16, y = const_231_to_fp16)[name = string("k_179_cast_fp16")]; + tensor var_3917 = const()[name = string("op_3917"), val = tensor([1, 1500, 20, -1])]; + tensor var_3918_cast_fp16 = reshape(shape = var_3917, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3918_cast_fp16")]; + tensor var_3919 = const()[name = string("op_3919"), val = tensor([0, 2, 1, 3])]; + bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)]; + bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)]; + tensor transpose_327_perm_0 = const()[name = string("transpose_327_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_328_perm_0 = const()[name = string("transpose_328_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_328 = transpose(perm = transpose_328_perm_0, x = k_179_cast_fp16)[name = string("transpose_498")]; + tensor transpose_327 = transpose(perm = transpose_327_perm_0, x = q_143_cast_fp16)[name = string("transpose_499")]; + tensor qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_327, y = transpose_328)[name = string("qk_107_cast_fp16")]; + tensor var_3923_cast_fp16 = softmax(axis = var_3767, x = qk_107_cast_fp16)[name = string("op_3923_cast_fp16")]; + bool var_3925_transpose_x_0 = const()[name = string("op_3925_transpose_x_0"), val = bool(false)]; + bool var_3925_transpose_y_0 = const()[name = string("op_3925_transpose_y_0"), val = bool(false)]; + tensor v_179_cast_fp16 = transpose(perm = var_3919, x = var_3918_cast_fp16)[name = string("transpose_500")]; + tensor var_3925_cast_fp16 = matmul(transpose_x = var_3925_transpose_x_0, transpose_y = var_3925_transpose_y_0, x = var_3923_cast_fp16, y = v_179_cast_fp16)[name = string("op_3925_cast_fp16")]; + tensor var_3926 = const()[name = string("op_3926"), val = tensor([0, 2, 1, 3])]; + tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, -1, 1280])]; + tensor var_3927_cast_fp16 = transpose(perm = var_3926, x = var_3925_cast_fp16)[name = string("transpose_497")]; + tensor x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3927_cast_fp16)[name = string("x_319_cast_fp16")]; + tensor var_3931_to_fp16 = const()[name = string("op_3931_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(935171072)))]; + tensor var_3932_to_fp16 = const()[name = string("op_3932_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938447936)))]; + tensor linear_141_cast_fp16 = linear(bias = var_3932_to_fp16, weight = var_3931_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")]; + tensor var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938450560)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938453184)))]; + tensor var_3939_cast_fp16 = layer_norm(axes = var_3939_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3773_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3939_cast_fp16")]; + tensor var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(938455808)))]; + tensor var_3949_to_fp16 = const()[name = string("op_3949_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951563072)))]; + tensor linear_142_cast_fp16 = linear(bias = var_3949_to_fp16, weight = var_3948_to_fp16, x = var_3939_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")]; + tensor x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_3954_to_fp16 = const()[name = string("op_3954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(951573376)))]; + tensor var_3955_to_fp16 = const()[name = string("op_3955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964680640)))]; + tensor linear_143_cast_fp16 = linear(bias = var_3955_to_fp16, weight = var_3954_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")]; + tensor k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; + tensor k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_98)[name = string("k_cache_73_cast_fp16")]; + tensor v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor([19, 1, 448, 1280])]; + tensor v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_99)[name = string("v_cache_73_cast_fp16")]; + tensor k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; + tensor k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")]; + tensor v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor([19, 1, 1500, 1280])]; + tensor v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")]; + int32 var_3978 = const()[name = string("op_3978"), val = int32(-1)]; + tensor var_3996_axes_0 = const()[name = string("op_3996_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964683264)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964685888)))]; + fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3996_cast_fp16 = layer_norm(axes = var_3996_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3996_cast_fp16")]; + tensor var_4007_to_fp16 = const()[name = string("op_4007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(964688512)))]; + tensor var_4008_to_fp16 = const()[name = string("op_4008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967965376)))]; + tensor linear_144_cast_fp16 = linear(bias = var_4008_to_fp16, weight = var_4007_to_fp16, x = var_3996_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_4011_to_fp16 = const()[name = string("op_4011_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(967968000)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4011_to_fp16, x = var_3996_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor var_4015_to_fp16 = const()[name = string("op_4015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971244864)))]; + tensor var_4016_to_fp16 = const()[name = string("op_4016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974521728)))]; + tensor linear_146_cast_fp16 = linear(bias = var_4016_to_fp16, weight = var_4015_to_fp16, x = var_3996_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor var_4018_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_4018_shape_cast_fp16")]; + int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)]; + int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)]; + bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)]; + string var_4018_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4018_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)]; + tensor var_4018_shape_cast_fp16_to_uint16 = cast(dtype = var_4018_shape_cast_fp16_to_uint16_dtype_0, x = var_4018_shape_cast_fp16)[name = string("cast_354")]; + uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_4018_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")]; + string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_353")]; + int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; + tensor expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor([0])]; + tensor expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor([0])]; + tensor expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")]; + tensor concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor([18])]; + int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)]; + bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)]; + tensor concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")]; + tensor concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor([0])]; + tensor concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor([0])]; + tensor concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor([0])]; + int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)]; + bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)]; + tensor concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")]; + tensor k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_98)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = k_cache1)[name = string("coreml_update_state_100")]; + tensor v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_99)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = v_cache1)[name = string("coreml_update_state_101")]; + int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)]; + int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1280)]; + int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)]; + bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)]; + tensor concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")]; + tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_406, end_mask = var_4034_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4034_cast_fp16")]; + tensor var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_406, end_mask = var_4037_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4037_cast_fp16")]; + tensor concat_408x = const()[name = string("concat_408x"), val = tensor([1, -1, 20, 64])]; + tensor var_4047_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4047_cast_fp16")]; + tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_147_cast_fp16 = mul(x = var_4047_cast_fp16, y = const_232_to_fp16)[name = string("q_147_cast_fp16")]; + tensor concat_409x = const()[name = string("concat_409x"), val = tensor([1, -1, 20, 64])]; + tensor var_4054_cast_fp16 = reshape(shape = concat_409x, x = var_4034_cast_fp16)[name = string("op_4054_cast_fp16")]; + tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_185_cast_fp16 = mul(x = var_4054_cast_fp16, y = const_233_to_fp16)[name = string("k_185_cast_fp16")]; + tensor concat_410x = const()[name = string("concat_410x"), val = tensor([1, -1, 20, 64])]; + tensor var_4061_cast_fp16 = reshape(shape = concat_410x, x = var_4037_cast_fp16)[name = string("op_4061_cast_fp16")]; + tensor var_4062 = const()[name = string("op_4062"), val = tensor([0, 2, 1, 3])]; + bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)]; + bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)]; + tensor transpose_329_perm_0 = const()[name = string("transpose_329_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_330_perm_0 = const()[name = string("transpose_330_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_330 = transpose(perm = transpose_330_perm_0, x = k_185_cast_fp16)[name = string("transpose_494")]; + tensor transpose_329 = transpose(perm = transpose_329_perm_0, x = q_147_cast_fp16)[name = string("transpose_495")]; + tensor qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_329, y = transpose_330)[name = string("qk_109_cast_fp16")]; + int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)]; + int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)]; + bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)]; + tensor concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")]; + tensor var_4065_begin_0 = const()[name = string("op_4065_begin_0"), val = tensor([0, 0])]; + tensor var_4065_end_mask_0 = const()[name = string("op_4065_end_mask_0"), val = tensor([false, true])]; + tensor var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = concat_411, end_mask = var_4065_end_mask_0, x = mask_to_fp16)[name = string("op_4065_cast_fp16")]; + int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)]; + int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; + bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; + tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")]; + tensor var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor([0, 0])]; + tensor var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor([true, false])]; + tensor var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = concat_412, end_mask = var_4066_end_mask_0, x = var_4065_cast_fp16)[name = string("op_4066_cast_fp16")]; + tensor qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4066_cast_fp16)[name = string("qk_111_cast_fp16")]; + tensor var_4069_cast_fp16 = softmax(axis = var_3978, x = qk_111_cast_fp16)[name = string("op_4069_cast_fp16")]; + bool var_4071_transpose_x_0 = const()[name = string("op_4071_transpose_x_0"), val = bool(false)]; + bool var_4071_transpose_y_0 = const()[name = string("op_4071_transpose_y_0"), val = bool(false)]; + tensor v_185_cast_fp16 = transpose(perm = var_4062, x = var_4061_cast_fp16)[name = string("transpose_496")]; + tensor var_4071_cast_fp16 = matmul(transpose_x = var_4071_transpose_x_0, transpose_y = var_4071_transpose_y_0, x = var_4069_cast_fp16, y = v_185_cast_fp16)[name = string("op_4071_cast_fp16")]; + tensor var_4072 = const()[name = string("op_4072"), val = tensor([0, 2, 1, 3])]; + tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, -1, 1280])]; + tensor var_4073_cast_fp16 = transpose(perm = var_4072, x = var_4071_cast_fp16)[name = string("transpose_493")]; + tensor x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4073_cast_fp16)[name = string("x_331_cast_fp16")]; + tensor var_4077_to_fp16 = const()[name = string("op_4077_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(974524352)))]; + tensor var_4078_to_fp16 = const()[name = string("op_4078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977801216)))]; + tensor linear_147_cast_fp16 = linear(bias = var_4078_to_fp16, weight = var_4077_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")]; + tensor var_4085_axes_0 = const()[name = string("op_4085_axes_0"), val = tensor([-1])]; + tensor blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977803840)))]; + tensor blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977806464)))]; + tensor var_4085_cast_fp16 = layer_norm(axes = var_4085_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4085_cast_fp16")]; + tensor var_4094_to_fp16 = const()[name = string("op_4094_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(977809088)))]; + tensor var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981085952)))]; + tensor linear_148_cast_fp16 = linear(bias = var_4095_to_fp16, weight = var_4094_to_fp16, x = var_4085_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor concat_414 = const()[name = string("concat_414"), val = tensor([0, 0, 0])]; + tensor concat_415 = const()[name = string("concat_415"), val = tensor([0, 1500, 0])]; + tensor k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_416 = const()[name = string("concat_416"), val = tensor([0, 0, 0])]; + tensor concat_417 = const()[name = string("concat_417"), val = tensor([0, 1500, 0])]; + tensor v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 20, 64])]; + tensor var_4115_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4115_cast_fp16")]; + tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_151_cast_fp16 = mul(x = var_4115_cast_fp16, y = const_234_to_fp16)[name = string("q_151_cast_fp16")]; + tensor var_4121 = const()[name = string("op_4121"), val = tensor([1, 1500, 20, -1])]; + tensor var_4122_cast_fp16 = reshape(shape = var_4121, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4122_cast_fp16")]; + tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_189_cast_fp16 = mul(x = var_4122_cast_fp16, y = const_235_to_fp16)[name = string("k_189_cast_fp16")]; + tensor var_4128 = const()[name = string("op_4128"), val = tensor([1, 1500, 20, -1])]; + tensor var_4129_cast_fp16 = reshape(shape = var_4128, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4129_cast_fp16")]; + tensor var_4130 = const()[name = string("op_4130"), val = tensor([0, 2, 1, 3])]; + bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)]; + bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)]; + tensor transpose_331_perm_0 = const()[name = string("transpose_331_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_332_perm_0 = const()[name = string("transpose_332_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_332 = transpose(perm = transpose_332_perm_0, x = k_189_cast_fp16)[name = string("transpose_490")]; + tensor transpose_331 = transpose(perm = transpose_331_perm_0, x = q_151_cast_fp16)[name = string("transpose_491")]; + tensor qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_331, y = transpose_332)[name = string("qk_113_cast_fp16")]; + tensor var_4134_cast_fp16 = softmax(axis = var_3978, x = qk_113_cast_fp16)[name = string("op_4134_cast_fp16")]; + bool var_4136_transpose_x_0 = const()[name = string("op_4136_transpose_x_0"), val = bool(false)]; + bool var_4136_transpose_y_0 = const()[name = string("op_4136_transpose_y_0"), val = bool(false)]; + tensor v_189_cast_fp16 = transpose(perm = var_4130, x = var_4129_cast_fp16)[name = string("transpose_492")]; + tensor var_4136_cast_fp16 = matmul(transpose_x = var_4136_transpose_x_0, transpose_y = var_4136_transpose_y_0, x = var_4134_cast_fp16, y = v_189_cast_fp16)[name = string("op_4136_cast_fp16")]; + tensor var_4137 = const()[name = string("op_4137"), val = tensor([0, 2, 1, 3])]; + tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 1280])]; + tensor var_4138_cast_fp16 = transpose(perm = var_4137, x = var_4136_cast_fp16)[name = string("transpose_489")]; + tensor x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4138_cast_fp16)[name = string("x_337_cast_fp16")]; + tensor var_4142_to_fp16 = const()[name = string("op_4142_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(981088576)))]; + tensor var_4143_to_fp16 = const()[name = string("op_4143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984365440)))]; + tensor linear_149_cast_fp16 = linear(bias = var_4143_to_fp16, weight = var_4142_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")]; + tensor var_4150_axes_0 = const()[name = string("op_4150_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984368064)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984370688)))]; + tensor var_4150_cast_fp16 = layer_norm(axes = var_4150_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3984_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4150_cast_fp16")]; + tensor var_4159_to_fp16 = const()[name = string("op_4159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984373312)))]; + tensor var_4160_to_fp16 = const()[name = string("op_4160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997480576)))]; + tensor linear_150_cast_fp16 = linear(bias = var_4160_to_fp16, weight = var_4159_to_fp16, x = var_4150_cast_fp16)[name = string("linear_150_cast_fp16")]; + string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")]; + tensor x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")]; + tensor var_4165_to_fp16 = const()[name = string("op_4165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997490880)))]; + tensor var_4166_to_fp16 = const()[name = string("op_4166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010598144)))]; + tensor linear_151_cast_fp16 = linear(bias = var_4166_to_fp16, weight = var_4165_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")]; + tensor k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; + tensor k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_100)[name = string("k_cache_77_cast_fp16")]; + tensor v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor([20, 1, 448, 1280])]; + tensor v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_101)[name = string("v_cache_77_cast_fp16")]; + tensor k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; + tensor k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")]; + tensor v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor([20, 1, 1500, 1280])]; + tensor v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")]; + int32 var_4189 = const()[name = string("op_4189"), val = int32(-1)]; + tensor var_4207_axes_0 = const()[name = string("op_4207_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010600768)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010603392)))]; + fp16 var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4207_cast_fp16 = layer_norm(axes = var_4207_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4207_cast_fp16")]; + tensor var_4218_to_fp16 = const()[name = string("op_4218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1010606016)))]; + tensor var_4219_to_fp16 = const()[name = string("op_4219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013882880)))]; + tensor linear_152_cast_fp16 = linear(bias = var_4219_to_fp16, weight = var_4218_to_fp16, x = var_4207_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor var_4222_to_fp16 = const()[name = string("op_4222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1013885504)))]; + tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4222_to_fp16, x = var_4207_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor var_4226_to_fp16 = const()[name = string("op_4226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1017162368)))]; + tensor var_4227_to_fp16 = const()[name = string("op_4227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020439232)))]; + tensor linear_154_cast_fp16 = linear(bias = var_4227_to_fp16, weight = var_4226_to_fp16, x = var_4207_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor var_4229_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4229_shape_cast_fp16")]; + int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)]; + int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)]; + bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)]; + string var_4229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)]; + tensor var_4229_shape_cast_fp16_to_uint16 = cast(dtype = var_4229_shape_cast_fp16_to_uint16_dtype_0, x = var_4229_shape_cast_fp16)[name = string("cast_352")]; + uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4229_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")]; + string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_351")]; + int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([0])]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([0])]; + tensor expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor([0])]; + tensor expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")]; + tensor concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor([19])]; + int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)]; + bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)]; + tensor concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")]; + tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([0])]; + tensor concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor([0])]; + tensor concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor([0])]; + int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; + bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; + tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")]; + tensor k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_100)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = k_cache1)[name = string("coreml_update_state_102")]; + tensor v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_101)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = v_cache1)[name = string("coreml_update_state_103")]; + int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)]; + int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1280)]; + int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)]; + bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)]; + tensor concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")]; + tensor var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_428, end_mask = var_4245_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4245_cast_fp16")]; + tensor var_4248_begin_0 = const()[name = string("op_4248_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4248_end_mask_0 = const()[name = string("op_4248_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = concat_428, end_mask = var_4248_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4248_cast_fp16")]; + tensor concat_430x = const()[name = string("concat_430x"), val = tensor([1, -1, 20, 64])]; + tensor var_4258_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4258_cast_fp16")]; + tensor const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_155_cast_fp16 = mul(x = var_4258_cast_fp16, y = const_236_to_fp16)[name = string("q_155_cast_fp16")]; + tensor concat_431x = const()[name = string("concat_431x"), val = tensor([1, -1, 20, 64])]; + tensor var_4265_cast_fp16 = reshape(shape = concat_431x, x = var_4245_cast_fp16)[name = string("op_4265_cast_fp16")]; + tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_195_cast_fp16 = mul(x = var_4265_cast_fp16, y = const_237_to_fp16)[name = string("k_195_cast_fp16")]; + tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, -1, 20, 64])]; + tensor var_4272_cast_fp16 = reshape(shape = concat_432x, x = var_4248_cast_fp16)[name = string("op_4272_cast_fp16")]; + tensor var_4273 = const()[name = string("op_4273"), val = tensor([0, 2, 1, 3])]; + bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)]; + bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)]; + tensor transpose_333_perm_0 = const()[name = string("transpose_333_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_334_perm_0 = const()[name = string("transpose_334_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_334 = transpose(perm = transpose_334_perm_0, x = k_195_cast_fp16)[name = string("transpose_486")]; + tensor transpose_333 = transpose(perm = transpose_333_perm_0, x = q_155_cast_fp16)[name = string("transpose_487")]; + tensor qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_333, y = transpose_334)[name = string("qk_115_cast_fp16")]; + int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)]; + int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; + bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; + tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")]; + tensor var_4276_begin_0 = const()[name = string("op_4276_begin_0"), val = tensor([0, 0])]; + tensor var_4276_end_mask_0 = const()[name = string("op_4276_end_mask_0"), val = tensor([false, true])]; + tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = concat_433, end_mask = var_4276_end_mask_0, x = mask_to_fp16)[name = string("op_4276_cast_fp16")]; + int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)]; + int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; + bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; + tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")]; + tensor var_4277_begin_0 = const()[name = string("op_4277_begin_0"), val = tensor([0, 0])]; + tensor var_4277_end_mask_0 = const()[name = string("op_4277_end_mask_0"), val = tensor([true, false])]; + tensor var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = concat_434, end_mask = var_4277_end_mask_0, x = var_4276_cast_fp16)[name = string("op_4277_cast_fp16")]; + tensor qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4277_cast_fp16)[name = string("qk_117_cast_fp16")]; + tensor var_4280_cast_fp16 = softmax(axis = var_4189, x = qk_117_cast_fp16)[name = string("op_4280_cast_fp16")]; + bool var_4282_transpose_x_0 = const()[name = string("op_4282_transpose_x_0"), val = bool(false)]; + bool var_4282_transpose_y_0 = const()[name = string("op_4282_transpose_y_0"), val = bool(false)]; + tensor v_195_cast_fp16 = transpose(perm = var_4273, x = var_4272_cast_fp16)[name = string("transpose_488")]; + tensor var_4282_cast_fp16 = matmul(transpose_x = var_4282_transpose_x_0, transpose_y = var_4282_transpose_y_0, x = var_4280_cast_fp16, y = v_195_cast_fp16)[name = string("op_4282_cast_fp16")]; + tensor var_4283 = const()[name = string("op_4283"), val = tensor([0, 2, 1, 3])]; + tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, -1, 1280])]; + tensor var_4284_cast_fp16 = transpose(perm = var_4283, x = var_4282_cast_fp16)[name = string("transpose_485")]; + tensor x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4284_cast_fp16)[name = string("x_349_cast_fp16")]; + tensor var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1020441856)))]; + tensor var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023718720)))]; + tensor linear_155_cast_fp16 = linear(bias = var_4289_to_fp16, weight = var_4288_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")]; + tensor var_4296_axes_0 = const()[name = string("op_4296_axes_0"), val = tensor([-1])]; + tensor blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023721344)))]; + tensor blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023723968)))]; + tensor var_4296_cast_fp16 = layer_norm(axes = var_4296_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4296_cast_fp16")]; + tensor var_4305_to_fp16 = const()[name = string("op_4305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1023726592)))]; + tensor var_4306_to_fp16 = const()[name = string("op_4306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027003456)))]; + tensor linear_156_cast_fp16 = linear(bias = var_4306_to_fp16, weight = var_4305_to_fp16, x = var_4296_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_436 = const()[name = string("concat_436"), val = tensor([0, 0, 0])]; + tensor concat_437 = const()[name = string("concat_437"), val = tensor([0, 1500, 0])]; + tensor k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_438 = const()[name = string("concat_438"), val = tensor([0, 0, 0])]; + tensor concat_439 = const()[name = string("concat_439"), val = tensor([0, 1500, 0])]; + tensor v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 20, 64])]; + tensor var_4326_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4326_cast_fp16")]; + tensor const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_159_cast_fp16 = mul(x = var_4326_cast_fp16, y = const_238_to_fp16)[name = string("q_159_cast_fp16")]; + tensor var_4332 = const()[name = string("op_4332"), val = tensor([1, 1500, 20, -1])]; + tensor var_4333_cast_fp16 = reshape(shape = var_4332, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4333_cast_fp16")]; + tensor const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_199_cast_fp16 = mul(x = var_4333_cast_fp16, y = const_239_to_fp16)[name = string("k_199_cast_fp16")]; + tensor var_4339 = const()[name = string("op_4339"), val = tensor([1, 1500, 20, -1])]; + tensor var_4340_cast_fp16 = reshape(shape = var_4339, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4340_cast_fp16")]; + tensor var_4341 = const()[name = string("op_4341"), val = tensor([0, 2, 1, 3])]; + bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)]; + bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)]; + tensor transpose_335_perm_0 = const()[name = string("transpose_335_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_336_perm_0 = const()[name = string("transpose_336_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_336 = transpose(perm = transpose_336_perm_0, x = k_199_cast_fp16)[name = string("transpose_482")]; + tensor transpose_335 = transpose(perm = transpose_335_perm_0, x = q_159_cast_fp16)[name = string("transpose_483")]; + tensor qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_335, y = transpose_336)[name = string("qk_119_cast_fp16")]; + tensor var_4345_cast_fp16 = softmax(axis = var_4189, x = qk_119_cast_fp16)[name = string("op_4345_cast_fp16")]; + bool var_4347_transpose_x_0 = const()[name = string("op_4347_transpose_x_0"), val = bool(false)]; + bool var_4347_transpose_y_0 = const()[name = string("op_4347_transpose_y_0"), val = bool(false)]; + tensor v_199_cast_fp16 = transpose(perm = var_4341, x = var_4340_cast_fp16)[name = string("transpose_484")]; + tensor var_4347_cast_fp16 = matmul(transpose_x = var_4347_transpose_x_0, transpose_y = var_4347_transpose_y_0, x = var_4345_cast_fp16, y = v_199_cast_fp16)[name = string("op_4347_cast_fp16")]; + tensor var_4348 = const()[name = string("op_4348"), val = tensor([0, 2, 1, 3])]; + tensor concat_441x = const()[name = string("concat_441x"), val = tensor([1, -1, 1280])]; + tensor var_4349_cast_fp16 = transpose(perm = var_4348, x = var_4347_cast_fp16)[name = string("transpose_481")]; + tensor x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4349_cast_fp16)[name = string("x_355_cast_fp16")]; + tensor var_4353_to_fp16 = const()[name = string("op_4353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1027006080)))]; + tensor var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030282944)))]; + tensor linear_157_cast_fp16 = linear(bias = var_4354_to_fp16, weight = var_4353_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_4361_axes_0 = const()[name = string("op_4361_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030285568)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030288192)))]; + tensor var_4361_cast_fp16 = layer_norm(axes = var_4361_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4195_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4361_cast_fp16")]; + tensor var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1030290816)))]; + tensor var_4371_to_fp16 = const()[name = string("op_4371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043398080)))]; + tensor linear_158_cast_fp16 = linear(bias = var_4371_to_fp16, weight = var_4370_to_fp16, x = var_4361_cast_fp16)[name = string("linear_158_cast_fp16")]; + string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")]; + tensor x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_4376_to_fp16 = const()[name = string("op_4376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043408384)))]; + tensor var_4377_to_fp16 = const()[name = string("op_4377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056515648)))]; + tensor linear_159_cast_fp16 = linear(bias = var_4377_to_fp16, weight = var_4376_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")]; + tensor k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; + tensor k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_102)[name = string("k_cache_81_cast_fp16")]; + tensor v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor([21, 1, 448, 1280])]; + tensor v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_103)[name = string("v_cache_81_cast_fp16")]; + tensor k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; + tensor k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")]; + tensor v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor([21, 1, 1500, 1280])]; + tensor v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")]; + int32 var_4400 = const()[name = string("op_4400"), val = int32(-1)]; + tensor var_4418_axes_0 = const()[name = string("op_4418_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056518272)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056520896)))]; + fp16 var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4418_cast_fp16 = layer_norm(axes = var_4418_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4418_cast_fp16")]; + tensor var_4429_to_fp16 = const()[name = string("op_4429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1056523520)))]; + tensor var_4430_to_fp16 = const()[name = string("op_4430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059800384)))]; + tensor linear_160_cast_fp16 = linear(bias = var_4430_to_fp16, weight = var_4429_to_fp16, x = var_4418_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor var_4433_to_fp16 = const()[name = string("op_4433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059803008)))]; + tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4433_to_fp16, x = var_4418_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor var_4437_to_fp16 = const()[name = string("op_4437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1063079872)))]; + tensor var_4438_to_fp16 = const()[name = string("op_4438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066356736)))]; + tensor linear_162_cast_fp16 = linear(bias = var_4438_to_fp16, weight = var_4437_to_fp16, x = var_4418_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor var_4440_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4440_shape_cast_fp16")]; + int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)]; + int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)]; + bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)]; + string var_4440_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4440_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)]; + tensor var_4440_shape_cast_fp16_to_uint16 = cast(dtype = var_4440_shape_cast_fp16_to_uint16_dtype_0, x = var_4440_shape_cast_fp16)[name = string("cast_350")]; + uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4440_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")]; + string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_349")]; + int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")]; + tensor expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor([0])]; + tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([0])]; + tensor expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor([0])]; + tensor expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")]; + tensor concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor([20])]; + int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)]; + bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)]; + tensor concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")]; + tensor concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor([0])]; + tensor concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor([0])]; + tensor concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor([0])]; + int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)]; + bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)]; + tensor concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")]; + tensor k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_102)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = k_cache1)[name = string("coreml_update_state_104")]; + tensor v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_103)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = v_cache1)[name = string("coreml_update_state_105")]; + int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)]; + int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1280)]; + int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; + bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; + tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")]; + tensor var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_450, end_mask = var_4456_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4456_cast_fp16")]; + tensor var_4459_begin_0 = const()[name = string("op_4459_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4459_end_mask_0 = const()[name = string("op_4459_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4459_cast_fp16 = slice_by_index(begin = var_4459_begin_0, end = concat_450, end_mask = var_4459_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4459_cast_fp16")]; + tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, -1, 20, 64])]; + tensor var_4469_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4469_cast_fp16")]; + tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_163_cast_fp16 = mul(x = var_4469_cast_fp16, y = const_240_to_fp16)[name = string("q_163_cast_fp16")]; + tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, -1, 20, 64])]; + tensor var_4476_cast_fp16 = reshape(shape = concat_453x, x = var_4456_cast_fp16)[name = string("op_4476_cast_fp16")]; + tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_205_cast_fp16 = mul(x = var_4476_cast_fp16, y = const_241_to_fp16)[name = string("k_205_cast_fp16")]; + tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, -1, 20, 64])]; + tensor var_4483_cast_fp16 = reshape(shape = concat_454x, x = var_4459_cast_fp16)[name = string("op_4483_cast_fp16")]; + tensor var_4484 = const()[name = string("op_4484"), val = tensor([0, 2, 1, 3])]; + bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)]; + bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)]; + tensor transpose_337_perm_0 = const()[name = string("transpose_337_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_338_perm_0 = const()[name = string("transpose_338_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_338 = transpose(perm = transpose_338_perm_0, x = k_205_cast_fp16)[name = string("transpose_478")]; + tensor transpose_337 = transpose(perm = transpose_337_perm_0, x = q_163_cast_fp16)[name = string("transpose_479")]; + tensor qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_337, y = transpose_338)[name = string("qk_121_cast_fp16")]; + int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)]; + int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; + bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; + tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")]; + tensor var_4487_begin_0 = const()[name = string("op_4487_begin_0"), val = tensor([0, 0])]; + tensor var_4487_end_mask_0 = const()[name = string("op_4487_end_mask_0"), val = tensor([false, true])]; + tensor var_4487_cast_fp16 = slice_by_index(begin = var_4487_begin_0, end = concat_455, end_mask = var_4487_end_mask_0, x = mask_to_fp16)[name = string("op_4487_cast_fp16")]; + int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)]; + int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; + bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; + tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")]; + tensor var_4488_begin_0 = const()[name = string("op_4488_begin_0"), val = tensor([0, 0])]; + tensor var_4488_end_mask_0 = const()[name = string("op_4488_end_mask_0"), val = tensor([true, false])]; + tensor var_4488_cast_fp16 = slice_by_index(begin = var_4488_begin_0, end = concat_456, end_mask = var_4488_end_mask_0, x = var_4487_cast_fp16)[name = string("op_4488_cast_fp16")]; + tensor qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4488_cast_fp16)[name = string("qk_123_cast_fp16")]; + tensor var_4491_cast_fp16 = softmax(axis = var_4400, x = qk_123_cast_fp16)[name = string("op_4491_cast_fp16")]; + bool var_4493_transpose_x_0 = const()[name = string("op_4493_transpose_x_0"), val = bool(false)]; + bool var_4493_transpose_y_0 = const()[name = string("op_4493_transpose_y_0"), val = bool(false)]; + tensor v_205_cast_fp16 = transpose(perm = var_4484, x = var_4483_cast_fp16)[name = string("transpose_480")]; + tensor var_4493_cast_fp16 = matmul(transpose_x = var_4493_transpose_x_0, transpose_y = var_4493_transpose_y_0, x = var_4491_cast_fp16, y = v_205_cast_fp16)[name = string("op_4493_cast_fp16")]; + tensor var_4494 = const()[name = string("op_4494"), val = tensor([0, 2, 1, 3])]; + tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 1280])]; + tensor var_4495_cast_fp16 = transpose(perm = var_4494, x = var_4493_cast_fp16)[name = string("transpose_477")]; + tensor x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4495_cast_fp16)[name = string("x_367_cast_fp16")]; + tensor var_4499_to_fp16 = const()[name = string("op_4499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066359360)))]; + tensor var_4500_to_fp16 = const()[name = string("op_4500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069636224)))]; + tensor linear_163_cast_fp16 = linear(bias = var_4500_to_fp16, weight = var_4499_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")]; + tensor var_4507_axes_0 = const()[name = string("op_4507_axes_0"), val = tensor([-1])]; + tensor blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069638848)))]; + tensor blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069641472)))]; + tensor var_4507_cast_fp16 = layer_norm(axes = var_4507_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4507_cast_fp16")]; + tensor var_4516_to_fp16 = const()[name = string("op_4516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1069644096)))]; + tensor var_4517_to_fp16 = const()[name = string("op_4517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072920960)))]; + tensor linear_164_cast_fp16 = linear(bias = var_4517_to_fp16, weight = var_4516_to_fp16, x = var_4507_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor concat_458 = const()[name = string("concat_458"), val = tensor([0, 0, 0])]; + tensor concat_459 = const()[name = string("concat_459"), val = tensor([0, 1500, 0])]; + tensor k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_460 = const()[name = string("concat_460"), val = tensor([0, 0, 0])]; + tensor concat_461 = const()[name = string("concat_461"), val = tensor([0, 1500, 0])]; + tensor v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_462x = const()[name = string("concat_462x"), val = tensor([1, -1, 20, 64])]; + tensor var_4537_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4537_cast_fp16")]; + tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_167_cast_fp16 = mul(x = var_4537_cast_fp16, y = const_242_to_fp16)[name = string("q_167_cast_fp16")]; + tensor var_4543 = const()[name = string("op_4543"), val = tensor([1, 1500, 20, -1])]; + tensor var_4544_cast_fp16 = reshape(shape = var_4543, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4544_cast_fp16")]; + tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_209_cast_fp16 = mul(x = var_4544_cast_fp16, y = const_243_to_fp16)[name = string("k_209_cast_fp16")]; + tensor var_4550 = const()[name = string("op_4550"), val = tensor([1, 1500, 20, -1])]; + tensor var_4551_cast_fp16 = reshape(shape = var_4550, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4551_cast_fp16")]; + tensor var_4552 = const()[name = string("op_4552"), val = tensor([0, 2, 1, 3])]; + bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)]; + bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)]; + tensor transpose_339_perm_0 = const()[name = string("transpose_339_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_340_perm_0 = const()[name = string("transpose_340_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_340 = transpose(perm = transpose_340_perm_0, x = k_209_cast_fp16)[name = string("transpose_474")]; + tensor transpose_339 = transpose(perm = transpose_339_perm_0, x = q_167_cast_fp16)[name = string("transpose_475")]; + tensor qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_339, y = transpose_340)[name = string("qk_125_cast_fp16")]; + tensor var_4556_cast_fp16 = softmax(axis = var_4400, x = qk_125_cast_fp16)[name = string("op_4556_cast_fp16")]; + bool var_4558_transpose_x_0 = const()[name = string("op_4558_transpose_x_0"), val = bool(false)]; + bool var_4558_transpose_y_0 = const()[name = string("op_4558_transpose_y_0"), val = bool(false)]; + tensor v_209_cast_fp16 = transpose(perm = var_4552, x = var_4551_cast_fp16)[name = string("transpose_476")]; + tensor var_4558_cast_fp16 = matmul(transpose_x = var_4558_transpose_x_0, transpose_y = var_4558_transpose_y_0, x = var_4556_cast_fp16, y = v_209_cast_fp16)[name = string("op_4558_cast_fp16")]; + tensor var_4559 = const()[name = string("op_4559"), val = tensor([0, 2, 1, 3])]; + tensor concat_463x = const()[name = string("concat_463x"), val = tensor([1, -1, 1280])]; + tensor var_4560_cast_fp16 = transpose(perm = var_4559, x = var_4558_cast_fp16)[name = string("transpose_473")]; + tensor x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4560_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_4564_to_fp16 = const()[name = string("op_4564_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072923584)))]; + tensor var_4565_to_fp16 = const()[name = string("op_4565_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076200448)))]; + tensor linear_165_cast_fp16 = linear(bias = var_4565_to_fp16, weight = var_4564_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")]; + tensor var_4572_axes_0 = const()[name = string("op_4572_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076203072)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076205696)))]; + tensor var_4572_cast_fp16 = layer_norm(axes = var_4572_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4406_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4572_cast_fp16")]; + tensor var_4581_to_fp16 = const()[name = string("op_4581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1076208320)))]; + tensor var_4582_to_fp16 = const()[name = string("op_4582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089315584)))]; + tensor linear_166_cast_fp16 = linear(bias = var_4582_to_fp16, weight = var_4581_to_fp16, x = var_4572_cast_fp16)[name = string("linear_166_cast_fp16")]; + string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")]; + tensor x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")]; + tensor var_4587_to_fp16 = const()[name = string("op_4587_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1089325888)))]; + tensor var_4588_to_fp16 = const()[name = string("op_4588_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102433152)))]; + tensor linear_167_cast_fp16 = linear(bias = var_4588_to_fp16, weight = var_4587_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")]; + tensor k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; + tensor k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_104)[name = string("k_cache_85_cast_fp16")]; + tensor v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor([22, 1, 448, 1280])]; + tensor v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_105)[name = string("v_cache_85_cast_fp16")]; + tensor k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; + tensor k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")]; + tensor v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor([22, 1, 1500, 1280])]; + tensor v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")]; + int32 var_4611 = const()[name = string("op_4611"), val = int32(-1)]; + tensor var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102435776)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102438400)))]; + fp16 var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4629_cast_fp16 = layer_norm(axes = var_4629_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4629_cast_fp16")]; + tensor var_4640_to_fp16 = const()[name = string("op_4640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1102441024)))]; + tensor var_4641_to_fp16 = const()[name = string("op_4641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105717888)))]; + tensor linear_168_cast_fp16 = linear(bias = var_4641_to_fp16, weight = var_4640_to_fp16, x = var_4629_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor var_4644_to_fp16 = const()[name = string("op_4644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1105720512)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4644_to_fp16, x = var_4629_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor var_4648_to_fp16 = const()[name = string("op_4648_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1108997376)))]; + tensor var_4649_to_fp16 = const()[name = string("op_4649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112274240)))]; + tensor linear_170_cast_fp16 = linear(bias = var_4649_to_fp16, weight = var_4648_to_fp16, x = var_4629_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor var_4651_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4651_shape_cast_fp16")]; + int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)]; + int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)]; + bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)]; + string var_4651_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4651_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)]; + tensor var_4651_shape_cast_fp16_to_uint16 = cast(dtype = var_4651_shape_cast_fp16_to_uint16_dtype_0, x = var_4651_shape_cast_fp16)[name = string("cast_348")]; + uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4651_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")]; + string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_347")]; + int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")]; + tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; + tensor expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor([0])]; + tensor expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor([0])]; + tensor expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")]; + tensor concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor([21])]; + int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)]; + bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)]; + tensor concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")]; + tensor concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor([0])]; + tensor concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor([0])]; + tensor concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor([0])]; + int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)]; + bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)]; + tensor concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")]; + tensor k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_104)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = k_cache1)[name = string("coreml_update_state_106")]; + tensor v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_105)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = v_cache1)[name = string("coreml_update_state_107")]; + int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)]; + int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1280)]; + int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; + bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; + tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")]; + tensor var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_472, end_mask = var_4667_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4667_cast_fp16")]; + tensor var_4670_begin_0 = const()[name = string("op_4670_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4670_end_mask_0 = const()[name = string("op_4670_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = concat_472, end_mask = var_4670_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4670_cast_fp16")]; + tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 20, 64])]; + tensor var_4680_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4680_cast_fp16")]; + tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_171_cast_fp16 = mul(x = var_4680_cast_fp16, y = const_244_to_fp16)[name = string("q_171_cast_fp16")]; + tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 20, 64])]; + tensor var_4687_cast_fp16 = reshape(shape = concat_475x, x = var_4667_cast_fp16)[name = string("op_4687_cast_fp16")]; + tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_215_cast_fp16 = mul(x = var_4687_cast_fp16, y = const_245_to_fp16)[name = string("k_215_cast_fp16")]; + tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 20, 64])]; + tensor var_4694_cast_fp16 = reshape(shape = concat_476x, x = var_4670_cast_fp16)[name = string("op_4694_cast_fp16")]; + tensor var_4695 = const()[name = string("op_4695"), val = tensor([0, 2, 1, 3])]; + bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)]; + bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)]; + tensor transpose_341_perm_0 = const()[name = string("transpose_341_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_342_perm_0 = const()[name = string("transpose_342_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_342 = transpose(perm = transpose_342_perm_0, x = k_215_cast_fp16)[name = string("transpose_470")]; + tensor transpose_341 = transpose(perm = transpose_341_perm_0, x = q_171_cast_fp16)[name = string("transpose_471")]; + tensor qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_341, y = transpose_342)[name = string("qk_127_cast_fp16")]; + int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)]; + int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)]; + bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)]; + tensor concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")]; + tensor var_4698_begin_0 = const()[name = string("op_4698_begin_0"), val = tensor([0, 0])]; + tensor var_4698_end_mask_0 = const()[name = string("op_4698_end_mask_0"), val = tensor([false, true])]; + tensor var_4698_cast_fp16 = slice_by_index(begin = var_4698_begin_0, end = concat_477, end_mask = var_4698_end_mask_0, x = mask_to_fp16)[name = string("op_4698_cast_fp16")]; + int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)]; + int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)]; + bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)]; + tensor concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")]; + tensor var_4699_begin_0 = const()[name = string("op_4699_begin_0"), val = tensor([0, 0])]; + tensor var_4699_end_mask_0 = const()[name = string("op_4699_end_mask_0"), val = tensor([true, false])]; + tensor var_4699_cast_fp16 = slice_by_index(begin = var_4699_begin_0, end = concat_478, end_mask = var_4699_end_mask_0, x = var_4698_cast_fp16)[name = string("op_4699_cast_fp16")]; + tensor qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4699_cast_fp16)[name = string("qk_129_cast_fp16")]; + tensor var_4702_cast_fp16 = softmax(axis = var_4611, x = qk_129_cast_fp16)[name = string("op_4702_cast_fp16")]; + bool var_4704_transpose_x_0 = const()[name = string("op_4704_transpose_x_0"), val = bool(false)]; + bool var_4704_transpose_y_0 = const()[name = string("op_4704_transpose_y_0"), val = bool(false)]; + tensor v_215_cast_fp16 = transpose(perm = var_4695, x = var_4694_cast_fp16)[name = string("transpose_472")]; + tensor var_4704_cast_fp16 = matmul(transpose_x = var_4704_transpose_x_0, transpose_y = var_4704_transpose_y_0, x = var_4702_cast_fp16, y = v_215_cast_fp16)[name = string("op_4704_cast_fp16")]; + tensor var_4705 = const()[name = string("op_4705"), val = tensor([0, 2, 1, 3])]; + tensor concat_479x = const()[name = string("concat_479x"), val = tensor([1, -1, 1280])]; + tensor var_4706_cast_fp16 = transpose(perm = var_4705, x = var_4704_cast_fp16)[name = string("transpose_469")]; + tensor x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4706_cast_fp16)[name = string("x_385_cast_fp16")]; + tensor var_4710_to_fp16 = const()[name = string("op_4710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1112276864)))]; + tensor var_4711_to_fp16 = const()[name = string("op_4711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115553728)))]; + tensor linear_171_cast_fp16 = linear(bias = var_4711_to_fp16, weight = var_4710_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")]; + tensor var_4718_axes_0 = const()[name = string("op_4718_axes_0"), val = tensor([-1])]; + tensor blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115556352)))]; + tensor blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115558976)))]; + tensor var_4718_cast_fp16 = layer_norm(axes = var_4718_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4718_cast_fp16")]; + tensor var_4727_to_fp16 = const()[name = string("op_4727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1115561600)))]; + tensor var_4728_to_fp16 = const()[name = string("op_4728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118838464)))]; + tensor linear_172_cast_fp16 = linear(bias = var_4728_to_fp16, weight = var_4727_to_fp16, x = var_4718_cast_fp16)[name = string("linear_172_cast_fp16")]; + tensor concat_480 = const()[name = string("concat_480"), val = tensor([0, 0, 0])]; + tensor concat_481 = const()[name = string("concat_481"), val = tensor([0, 1500, 0])]; + tensor k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_482 = const()[name = string("concat_482"), val = tensor([0, 0, 0])]; + tensor concat_483 = const()[name = string("concat_483"), val = tensor([0, 1500, 0])]; + tensor v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_484x = const()[name = string("concat_484x"), val = tensor([1, -1, 20, 64])]; + tensor var_4748_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4748_cast_fp16")]; + tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_175_cast_fp16 = mul(x = var_4748_cast_fp16, y = const_246_to_fp16)[name = string("q_175_cast_fp16")]; + tensor var_4754 = const()[name = string("op_4754"), val = tensor([1, 1500, 20, -1])]; + tensor var_4755_cast_fp16 = reshape(shape = var_4754, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4755_cast_fp16")]; + tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_219_cast_fp16 = mul(x = var_4755_cast_fp16, y = const_247_to_fp16)[name = string("k_219_cast_fp16")]; + tensor var_4761 = const()[name = string("op_4761"), val = tensor([1, 1500, 20, -1])]; + tensor var_4762_cast_fp16 = reshape(shape = var_4761, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4762_cast_fp16")]; + tensor var_4763 = const()[name = string("op_4763"), val = tensor([0, 2, 1, 3])]; + bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)]; + bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)]; + tensor transpose_343_perm_0 = const()[name = string("transpose_343_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_344_perm_0 = const()[name = string("transpose_344_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_344 = transpose(perm = transpose_344_perm_0, x = k_219_cast_fp16)[name = string("transpose_466")]; + tensor transpose_343 = transpose(perm = transpose_343_perm_0, x = q_175_cast_fp16)[name = string("transpose_467")]; + tensor qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_343, y = transpose_344)[name = string("qk_131_cast_fp16")]; + tensor var_4767_cast_fp16 = softmax(axis = var_4611, x = qk_131_cast_fp16)[name = string("op_4767_cast_fp16")]; + bool var_4769_transpose_x_0 = const()[name = string("op_4769_transpose_x_0"), val = bool(false)]; + bool var_4769_transpose_y_0 = const()[name = string("op_4769_transpose_y_0"), val = bool(false)]; + tensor v_219_cast_fp16 = transpose(perm = var_4763, x = var_4762_cast_fp16)[name = string("transpose_468")]; + tensor var_4769_cast_fp16 = matmul(transpose_x = var_4769_transpose_x_0, transpose_y = var_4769_transpose_y_0, x = var_4767_cast_fp16, y = v_219_cast_fp16)[name = string("op_4769_cast_fp16")]; + tensor var_4770 = const()[name = string("op_4770"), val = tensor([0, 2, 1, 3])]; + tensor concat_485x = const()[name = string("concat_485x"), val = tensor([1, -1, 1280])]; + tensor var_4771_cast_fp16 = transpose(perm = var_4770, x = var_4769_cast_fp16)[name = string("transpose_465")]; + tensor x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4771_cast_fp16)[name = string("x_391_cast_fp16")]; + tensor var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1118841088)))]; + tensor var_4776_to_fp16 = const()[name = string("op_4776_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122117952)))]; + tensor linear_173_cast_fp16 = linear(bias = var_4776_to_fp16, weight = var_4775_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")]; + tensor var_4783_axes_0 = const()[name = string("op_4783_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122120576)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122123200)))]; + tensor var_4783_cast_fp16 = layer_norm(axes = var_4783_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4617_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4783_cast_fp16")]; + tensor var_4792_to_fp16 = const()[name = string("op_4792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122125824)))]; + tensor var_4793_to_fp16 = const()[name = string("op_4793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135233088)))]; + tensor linear_174_cast_fp16 = linear(bias = var_4793_to_fp16, weight = var_4792_to_fp16, x = var_4783_cast_fp16)[name = string("linear_174_cast_fp16")]; + string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")]; + tensor x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")]; + tensor var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135243392)))]; + tensor var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148350656)))]; + tensor linear_175_cast_fp16 = linear(bias = var_4799_to_fp16, weight = var_4798_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")]; + tensor k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; + tensor k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_106)[name = string("k_cache_89_cast_fp16")]; + tensor v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor([23, 1, 448, 1280])]; + tensor v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_107)[name = string("v_cache_89_cast_fp16")]; + tensor k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; + tensor k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")]; + tensor v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor([23, 1, 1500, 1280])]; + tensor v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")]; + int32 var_4822 = const()[name = string("op_4822"), val = int32(-1)]; + tensor var_4840_axes_0 = const()[name = string("op_4840_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148353280)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148355904)))]; + fp16 var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4840_cast_fp16 = layer_norm(axes = var_4840_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4840_cast_fp16")]; + tensor var_4851_to_fp16 = const()[name = string("op_4851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148358528)))]; + tensor var_4852_to_fp16 = const()[name = string("op_4852_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151635392)))]; + tensor linear_176_cast_fp16 = linear(bias = var_4852_to_fp16, weight = var_4851_to_fp16, x = var_4840_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor var_4855_to_fp16 = const()[name = string("op_4855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1151638016)))]; + tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4855_to_fp16, x = var_4840_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor var_4859_to_fp16 = const()[name = string("op_4859_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154914880)))]; + tensor var_4860_to_fp16 = const()[name = string("op_4860_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158191744)))]; + tensor linear_178_cast_fp16 = linear(bias = var_4860_to_fp16, weight = var_4859_to_fp16, x = var_4840_cast_fp16)[name = string("linear_178_cast_fp16")]; + tensor var_4862_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4862_shape_cast_fp16")]; + int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; + int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; + bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; + string var_4862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)]; + tensor var_4862_shape_cast_fp16_to_uint16 = cast(dtype = var_4862_shape_cast_fp16_to_uint16_dtype_0, x = var_4862_shape_cast_fp16)[name = string("cast_346")]; + uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4862_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; + string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_345")]; + int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")]; + tensor expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor([0])]; + tensor expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor([0])]; + tensor expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor([0])]; + tensor expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")]; + tensor concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor([22])]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")]; + tensor concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor([0])]; + tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; + tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; + int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; + bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; + tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")]; + tensor k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_106)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = k_cache1)[name = string("coreml_update_state_108")]; + tensor v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_107)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = v_cache1)[name = string("coreml_update_state_109")]; + int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)]; + int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1280)]; + int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; + bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; + tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")]; + tensor var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_494, end_mask = var_4878_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4878_cast_fp16")]; + tensor var_4881_begin_0 = const()[name = string("op_4881_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4881_end_mask_0 = const()[name = string("op_4881_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4881_cast_fp16 = slice_by_index(begin = var_4881_begin_0, end = concat_494, end_mask = var_4881_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4881_cast_fp16")]; + tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 20, 64])]; + tensor var_4891_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4891_cast_fp16")]; + tensor const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_179_cast_fp16 = mul(x = var_4891_cast_fp16, y = const_248_to_fp16)[name = string("q_179_cast_fp16")]; + tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 20, 64])]; + tensor var_4898_cast_fp16 = reshape(shape = concat_497x, x = var_4878_cast_fp16)[name = string("op_4898_cast_fp16")]; + tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_225_cast_fp16 = mul(x = var_4898_cast_fp16, y = const_249_to_fp16)[name = string("k_225_cast_fp16")]; + tensor concat_498x = const()[name = string("concat_498x"), val = tensor([1, -1, 20, 64])]; + tensor var_4905_cast_fp16 = reshape(shape = concat_498x, x = var_4881_cast_fp16)[name = string("op_4905_cast_fp16")]; + tensor var_4906 = const()[name = string("op_4906"), val = tensor([0, 2, 1, 3])]; + bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)]; + bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)]; + tensor transpose_345_perm_0 = const()[name = string("transpose_345_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_346_perm_0 = const()[name = string("transpose_346_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_346 = transpose(perm = transpose_346_perm_0, x = k_225_cast_fp16)[name = string("transpose_462")]; + tensor transpose_345 = transpose(perm = transpose_345_perm_0, x = q_179_cast_fp16)[name = string("transpose_463")]; + tensor qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_345, y = transpose_346)[name = string("qk_133_cast_fp16")]; + int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)]; + int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; + bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; + tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")]; + tensor var_4909_begin_0 = const()[name = string("op_4909_begin_0"), val = tensor([0, 0])]; + tensor var_4909_end_mask_0 = const()[name = string("op_4909_end_mask_0"), val = tensor([false, true])]; + tensor var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = concat_499, end_mask = var_4909_end_mask_0, x = mask_to_fp16)[name = string("op_4909_cast_fp16")]; + int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)]; + int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; + bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; + tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")]; + tensor var_4910_begin_0 = const()[name = string("op_4910_begin_0"), val = tensor([0, 0])]; + tensor var_4910_end_mask_0 = const()[name = string("op_4910_end_mask_0"), val = tensor([true, false])]; + tensor var_4910_cast_fp16 = slice_by_index(begin = var_4910_begin_0, end = concat_500, end_mask = var_4910_end_mask_0, x = var_4909_cast_fp16)[name = string("op_4910_cast_fp16")]; + tensor qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4910_cast_fp16)[name = string("qk_135_cast_fp16")]; + tensor var_4913_cast_fp16 = softmax(axis = var_4822, x = qk_135_cast_fp16)[name = string("op_4913_cast_fp16")]; + bool var_4915_transpose_x_0 = const()[name = string("op_4915_transpose_x_0"), val = bool(false)]; + bool var_4915_transpose_y_0 = const()[name = string("op_4915_transpose_y_0"), val = bool(false)]; + tensor v_225_cast_fp16 = transpose(perm = var_4906, x = var_4905_cast_fp16)[name = string("transpose_464")]; + tensor var_4915_cast_fp16 = matmul(transpose_x = var_4915_transpose_x_0, transpose_y = var_4915_transpose_y_0, x = var_4913_cast_fp16, y = v_225_cast_fp16)[name = string("op_4915_cast_fp16")]; + tensor var_4916 = const()[name = string("op_4916"), val = tensor([0, 2, 1, 3])]; + tensor concat_501x = const()[name = string("concat_501x"), val = tensor([1, -1, 1280])]; + tensor var_4917_cast_fp16 = transpose(perm = var_4916, x = var_4915_cast_fp16)[name = string("transpose_461")]; + tensor x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4917_cast_fp16)[name = string("x_403_cast_fp16")]; + tensor var_4921_to_fp16 = const()[name = string("op_4921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1158194368)))]; + tensor var_4922_to_fp16 = const()[name = string("op_4922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161471232)))]; + tensor linear_179_cast_fp16 = linear(bias = var_4922_to_fp16, weight = var_4921_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_4929_axes_0 = const()[name = string("op_4929_axes_0"), val = tensor([-1])]; + tensor blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161473856)))]; + tensor blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161476480)))]; + tensor var_4929_cast_fp16 = layer_norm(axes = var_4929_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4929_cast_fp16")]; + tensor var_4938_to_fp16 = const()[name = string("op_4938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1161479104)))]; + tensor var_4939_to_fp16 = const()[name = string("op_4939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164755968)))]; + tensor linear_180_cast_fp16 = linear(bias = var_4939_to_fp16, weight = var_4938_to_fp16, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor concat_502 = const()[name = string("concat_502"), val = tensor([0, 0, 0])]; + tensor concat_503 = const()[name = string("concat_503"), val = tensor([0, 1500, 0])]; + tensor k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_504 = const()[name = string("concat_504"), val = tensor([0, 0, 0])]; + tensor concat_505 = const()[name = string("concat_505"), val = tensor([0, 1500, 0])]; + tensor v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_506x = const()[name = string("concat_506x"), val = tensor([1, -1, 20, 64])]; + tensor var_4959_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4959_cast_fp16")]; + tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_183_cast_fp16 = mul(x = var_4959_cast_fp16, y = const_250_to_fp16)[name = string("q_183_cast_fp16")]; + tensor var_4965 = const()[name = string("op_4965"), val = tensor([1, 1500, 20, -1])]; + tensor var_4966_cast_fp16 = reshape(shape = var_4965, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4966_cast_fp16")]; + tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_229_cast_fp16 = mul(x = var_4966_cast_fp16, y = const_251_to_fp16)[name = string("k_229_cast_fp16")]; + tensor var_4972 = const()[name = string("op_4972"), val = tensor([1, 1500, 20, -1])]; + tensor var_4973_cast_fp16 = reshape(shape = var_4972, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4973_cast_fp16")]; + tensor var_4974 = const()[name = string("op_4974"), val = tensor([0, 2, 1, 3])]; + bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)]; + bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)]; + tensor transpose_347_perm_0 = const()[name = string("transpose_347_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_348_perm_0 = const()[name = string("transpose_348_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_348 = transpose(perm = transpose_348_perm_0, x = k_229_cast_fp16)[name = string("transpose_458")]; + tensor transpose_347 = transpose(perm = transpose_347_perm_0, x = q_183_cast_fp16)[name = string("transpose_459")]; + tensor qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_347, y = transpose_348)[name = string("qk_137_cast_fp16")]; + tensor var_4978_cast_fp16 = softmax(axis = var_4822, x = qk_137_cast_fp16)[name = string("op_4978_cast_fp16")]; + bool var_4980_transpose_x_0 = const()[name = string("op_4980_transpose_x_0"), val = bool(false)]; + bool var_4980_transpose_y_0 = const()[name = string("op_4980_transpose_y_0"), val = bool(false)]; + tensor v_229_cast_fp16 = transpose(perm = var_4974, x = var_4973_cast_fp16)[name = string("transpose_460")]; + tensor var_4980_cast_fp16 = matmul(transpose_x = var_4980_transpose_x_0, transpose_y = var_4980_transpose_y_0, x = var_4978_cast_fp16, y = v_229_cast_fp16)[name = string("op_4980_cast_fp16")]; + tensor var_4981 = const()[name = string("op_4981"), val = tensor([0, 2, 1, 3])]; + tensor concat_507x = const()[name = string("concat_507x"), val = tensor([1, -1, 1280])]; + tensor var_4982_cast_fp16 = transpose(perm = var_4981, x = var_4980_cast_fp16)[name = string("transpose_457")]; + tensor x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4982_cast_fp16)[name = string("x_409_cast_fp16")]; + tensor var_4986_to_fp16 = const()[name = string("op_4986_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164758592)))]; + tensor var_4987_to_fp16 = const()[name = string("op_4987_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168035456)))]; + tensor linear_181_cast_fp16 = linear(bias = var_4987_to_fp16, weight = var_4986_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")]; + tensor var_4994_axes_0 = const()[name = string("op_4994_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168038080)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168040704)))]; + tensor var_4994_cast_fp16 = layer_norm(axes = var_4994_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4828_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4994_cast_fp16")]; + tensor var_5003_to_fp16 = const()[name = string("op_5003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1168043328)))]; + tensor var_5004_to_fp16 = const()[name = string("op_5004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181150592)))]; + tensor linear_182_cast_fp16 = linear(bias = var_5004_to_fp16, weight = var_5003_to_fp16, x = var_4994_cast_fp16)[name = string("linear_182_cast_fp16")]; + string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")]; + tensor x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")]; + tensor var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1181160896)))]; + tensor var_5010_to_fp16 = const()[name = string("op_5010_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194268160)))]; + tensor linear_183_cast_fp16 = linear(bias = var_5010_to_fp16, weight = var_5009_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")]; + tensor k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; + tensor k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_108)[name = string("k_cache_93_cast_fp16")]; + tensor v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor([24, 1, 448, 1280])]; + tensor v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_109)[name = string("v_cache_93_cast_fp16")]; + tensor k_cache_95_begin_0 = const()[name = string("k_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_95_end_0 = const()[name = string("k_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; + tensor k_cache_95_end_mask_0 = const()[name = string("k_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_95_squeeze_mask_0 = const()[name = string("k_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_95_cast_fp16 = slice_by_index(begin = k_cache_95_begin_0, end = k_cache_95_end_0, end_mask = k_cache_95_end_mask_0, squeeze_mask = k_cache_95_squeeze_mask_0, x = read_state_2)[name = string("k_cache_95_cast_fp16")]; + tensor v_cache_95_begin_0 = const()[name = string("v_cache_95_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_95_end_0 = const()[name = string("v_cache_95_end_0"), val = tensor([24, 1, 1500, 1280])]; + tensor v_cache_95_end_mask_0 = const()[name = string("v_cache_95_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_95_squeeze_mask_0 = const()[name = string("v_cache_95_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_95_cast_fp16 = slice_by_index(begin = v_cache_95_begin_0, end = v_cache_95_end_0, end_mask = v_cache_95_end_mask_0, squeeze_mask = v_cache_95_squeeze_mask_0, x = read_state_3)[name = string("v_cache_95_cast_fp16")]; + int32 var_5033 = const()[name = string("op_5033"), val = int32(-1)]; + tensor var_5051_axes_0 = const()[name = string("op_5051_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194270784)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194273408)))]; + fp16 var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5051_cast_fp16 = layer_norm(axes = var_5051_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5051_cast_fp16")]; + tensor var_5062_to_fp16 = const()[name = string("op_5062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1194276032)))]; + tensor var_5063_to_fp16 = const()[name = string("op_5063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197552896)))]; + tensor linear_184_cast_fp16 = linear(bias = var_5063_to_fp16, weight = var_5062_to_fp16, x = var_5051_cast_fp16)[name = string("linear_184_cast_fp16")]; + tensor var_5066_to_fp16 = const()[name = string("op_5066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1197555520)))]; + tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5066_to_fp16, x = var_5051_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor var_5070_to_fp16 = const()[name = string("op_5070_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1200832384)))]; + tensor var_5071_to_fp16 = const()[name = string("op_5071_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204109248)))]; + tensor linear_186_cast_fp16 = linear(bias = var_5071_to_fp16, weight = var_5070_to_fp16, x = var_5051_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_5073_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5073_shape_cast_fp16")]; + int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)]; + int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)]; + bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)]; + string var_5073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)]; + tensor var_5073_shape_cast_fp16_to_uint16 = cast(dtype = var_5073_shape_cast_fp16_to_uint16_dtype_0, x = var_5073_shape_cast_fp16)[name = string("cast_344")]; + uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5073_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")]; + string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_343")]; + int32 end_step_49 = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step_49")]; + tensor expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor([0])]; + tensor expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor([0])]; + tensor expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor([0])]; + tensor expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step_49)[name = string("expand_dims_371")]; + tensor concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor([23])]; + int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; + bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; + tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")]; + tensor concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor([0])]; + tensor concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor([0])]; + tensor concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor([0])]; + int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; + bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; + tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")]; + tensor k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_108)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = k_cache1)[name = string("coreml_update_state_110")]; + tensor v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_109)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = v_cache1)[name = string("coreml_update_state_111")]; + int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)]; + int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1280)]; + int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)]; + bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)]; + tensor concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step_49, concat_516_values2_0))[name = string("concat_516")]; + tensor var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_516, end_mask = var_5089_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5089_cast_fp16")]; + tensor var_5092_begin_0 = const()[name = string("op_5092_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5092_end_mask_0 = const()[name = string("op_5092_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5092_cast_fp16 = slice_by_index(begin = var_5092_begin_0, end = concat_516, end_mask = var_5092_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5092_cast_fp16")]; + tensor concat_518x = const()[name = string("concat_518x"), val = tensor([1, -1, 20, 64])]; + tensor var_5102_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5102_cast_fp16")]; + tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_187_cast_fp16 = mul(x = var_5102_cast_fp16, y = const_252_to_fp16)[name = string("q_187_cast_fp16")]; + tensor concat_519x = const()[name = string("concat_519x"), val = tensor([1, -1, 20, 64])]; + tensor var_5109_cast_fp16 = reshape(shape = concat_519x, x = var_5089_cast_fp16)[name = string("op_5109_cast_fp16")]; + tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_235_cast_fp16 = mul(x = var_5109_cast_fp16, y = const_253_to_fp16)[name = string("k_235_cast_fp16")]; + tensor concat_520x = const()[name = string("concat_520x"), val = tensor([1, -1, 20, 64])]; + tensor var_5116_cast_fp16 = reshape(shape = concat_520x, x = var_5092_cast_fp16)[name = string("op_5116_cast_fp16")]; + tensor var_5117 = const()[name = string("op_5117"), val = tensor([0, 2, 1, 3])]; + bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)]; + bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)]; + tensor transpose_349_perm_0 = const()[name = string("transpose_349_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_350_perm_0 = const()[name = string("transpose_350_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_350 = transpose(perm = transpose_350_perm_0, x = k_235_cast_fp16)[name = string("transpose_454")]; + tensor transpose_349 = transpose(perm = transpose_349_perm_0, x = q_187_cast_fp16)[name = string("transpose_455")]; + tensor qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_349, y = transpose_350)[name = string("qk_139_cast_fp16")]; + int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)]; + int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)]; + bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)]; + tensor concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")]; + tensor var_5120_begin_0 = const()[name = string("op_5120_begin_0"), val = tensor([0, 0])]; + tensor var_5120_end_mask_0 = const()[name = string("op_5120_end_mask_0"), val = tensor([false, true])]; + tensor var_5120_cast_fp16 = slice_by_index(begin = var_5120_begin_0, end = concat_521, end_mask = var_5120_end_mask_0, x = mask_to_fp16)[name = string("op_5120_cast_fp16")]; + int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)]; + int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)]; + bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)]; + tensor concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")]; + tensor var_5121_begin_0 = const()[name = string("op_5121_begin_0"), val = tensor([0, 0])]; + tensor var_5121_end_mask_0 = const()[name = string("op_5121_end_mask_0"), val = tensor([true, false])]; + tensor var_5121_cast_fp16 = slice_by_index(begin = var_5121_begin_0, end = concat_522, end_mask = var_5121_end_mask_0, x = var_5120_cast_fp16)[name = string("op_5121_cast_fp16")]; + tensor qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5121_cast_fp16)[name = string("qk_141_cast_fp16")]; + tensor var_5124_cast_fp16 = softmax(axis = var_5033, x = qk_141_cast_fp16)[name = string("op_5124_cast_fp16")]; + bool var_5126_transpose_x_0 = const()[name = string("op_5126_transpose_x_0"), val = bool(false)]; + bool var_5126_transpose_y_0 = const()[name = string("op_5126_transpose_y_0"), val = bool(false)]; + tensor v_235_cast_fp16 = transpose(perm = var_5117, x = var_5116_cast_fp16)[name = string("transpose_456")]; + tensor var_5126_cast_fp16 = matmul(transpose_x = var_5126_transpose_x_0, transpose_y = var_5126_transpose_y_0, x = var_5124_cast_fp16, y = v_235_cast_fp16)[name = string("op_5126_cast_fp16")]; + tensor var_5127 = const()[name = string("op_5127"), val = tensor([0, 2, 1, 3])]; + tensor concat_523x = const()[name = string("concat_523x"), val = tensor([1, -1, 1280])]; + tensor var_5128_cast_fp16 = transpose(perm = var_5127, x = var_5126_cast_fp16)[name = string("transpose_453")]; + tensor x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5128_cast_fp16)[name = string("x_421_cast_fp16")]; + tensor var_5132_to_fp16 = const()[name = string("op_5132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1204111872)))]; + tensor var_5133_to_fp16 = const()[name = string("op_5133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207388736)))]; + tensor linear_187_cast_fp16 = linear(bias = var_5133_to_fp16, weight = var_5132_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")]; + tensor var_5140_axes_0 = const()[name = string("op_5140_axes_0"), val = tensor([-1])]; + tensor blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207391360)))]; + tensor blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207393984)))]; + tensor var_5140_cast_fp16 = layer_norm(axes = var_5140_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5140_cast_fp16")]; + tensor var_5149_to_fp16 = const()[name = string("op_5149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1207396608)))]; + tensor var_5150_to_fp16 = const()[name = string("op_5150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210673472)))]; + tensor linear_188_cast_fp16 = linear(bias = var_5150_to_fp16, weight = var_5149_to_fp16, x = var_5140_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor concat_524 = const()[name = string("concat_524"), val = tensor([0, 0, 0])]; + tensor concat_525 = const()[name = string("concat_525"), val = tensor([0, 1500, 0])]; + tensor k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_526 = const()[name = string("concat_526"), val = tensor([0, 0, 0])]; + tensor concat_527 = const()[name = string("concat_527"), val = tensor([0, 1500, 0])]; + tensor v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_95_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, -1, 20, 64])]; + tensor var_5170_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5170_cast_fp16")]; + tensor const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_191_cast_fp16 = mul(x = var_5170_cast_fp16, y = const_254_to_fp16)[name = string("q_191_cast_fp16")]; + tensor var_5176 = const()[name = string("op_5176"), val = tensor([1, 1500, 20, -1])]; + tensor var_5177_cast_fp16 = reshape(shape = var_5176, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5177_cast_fp16")]; + tensor const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_239_cast_fp16 = mul(x = var_5177_cast_fp16, y = const_255_to_fp16)[name = string("k_239_cast_fp16")]; + tensor var_5183 = const()[name = string("op_5183"), val = tensor([1, 1500, 20, -1])]; + tensor var_5184_cast_fp16 = reshape(shape = var_5183, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5184_cast_fp16")]; + tensor var_5185 = const()[name = string("op_5185"), val = tensor([0, 2, 1, 3])]; + bool qk_143_transpose_x_0 = const()[name = string("qk_143_transpose_x_0"), val = bool(false)]; + bool qk_143_transpose_y_0 = const()[name = string("qk_143_transpose_y_0"), val = bool(false)]; + tensor transpose_351_perm_0 = const()[name = string("transpose_351_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_352_perm_0 = const()[name = string("transpose_352_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_352 = transpose(perm = transpose_352_perm_0, x = k_239_cast_fp16)[name = string("transpose_450")]; + tensor transpose_351 = transpose(perm = transpose_351_perm_0, x = q_191_cast_fp16)[name = string("transpose_451")]; + tensor qk_143_cast_fp16 = matmul(transpose_x = qk_143_transpose_x_0, transpose_y = qk_143_transpose_y_0, x = transpose_351, y = transpose_352)[name = string("qk_143_cast_fp16")]; + tensor var_5189_cast_fp16 = softmax(axis = var_5033, x = qk_143_cast_fp16)[name = string("op_5189_cast_fp16")]; + bool var_5191_transpose_x_0 = const()[name = string("op_5191_transpose_x_0"), val = bool(false)]; + bool var_5191_transpose_y_0 = const()[name = string("op_5191_transpose_y_0"), val = bool(false)]; + tensor v_239_cast_fp16 = transpose(perm = var_5185, x = var_5184_cast_fp16)[name = string("transpose_452")]; + tensor var_5191_cast_fp16 = matmul(transpose_x = var_5191_transpose_x_0, transpose_y = var_5191_transpose_y_0, x = var_5189_cast_fp16, y = v_239_cast_fp16)[name = string("op_5191_cast_fp16")]; + tensor var_5192 = const()[name = string("op_5192"), val = tensor([0, 2, 1, 3])]; + tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, -1, 1280])]; + tensor var_5193_cast_fp16 = transpose(perm = var_5192, x = var_5191_cast_fp16)[name = string("transpose_449")]; + tensor x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5193_cast_fp16)[name = string("x_427_cast_fp16")]; + tensor var_5197_to_fp16 = const()[name = string("op_5197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1210676096)))]; + tensor var_5198_to_fp16 = const()[name = string("op_5198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213952960)))]; + tensor linear_189_cast_fp16 = linear(bias = var_5198_to_fp16, weight = var_5197_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")]; + tensor var_5205_axes_0 = const()[name = string("op_5205_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213955584)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213958208)))]; + tensor var_5205_cast_fp16 = layer_norm(axes = var_5205_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5039_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5205_cast_fp16")]; + tensor var_5214_to_fp16 = const()[name = string("op_5214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1213960832)))]; + tensor var_5215_to_fp16 = const()[name = string("op_5215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227068096)))]; + tensor linear_190_cast_fp16 = linear(bias = var_5215_to_fp16, weight = var_5214_to_fp16, x = var_5205_cast_fp16)[name = string("linear_190_cast_fp16")]; + string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")]; + tensor x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")]; + tensor var_5220_to_fp16 = const()[name = string("op_5220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1227078400)))]; + tensor var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240185664)))]; + tensor linear_191_cast_fp16 = linear(bias = var_5221_to_fp16, weight = var_5220_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")]; + tensor k_cache_97_begin_0 = const()[name = string("k_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor k_cache_97_end_0 = const()[name = string("k_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; + tensor k_cache_97_end_mask_0 = const()[name = string("k_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_97_squeeze_mask_0 = const()[name = string("k_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_97_cast_fp16 = slice_by_index(begin = k_cache_97_begin_0, end = k_cache_97_end_0, end_mask = k_cache_97_end_mask_0, squeeze_mask = k_cache_97_squeeze_mask_0, x = coreml_update_state_110)[name = string("k_cache_97_cast_fp16")]; + tensor v_cache_97_begin_0 = const()[name = string("v_cache_97_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor v_cache_97_end_0 = const()[name = string("v_cache_97_end_0"), val = tensor([25, 1, 448, 1280])]; + tensor v_cache_97_end_mask_0 = const()[name = string("v_cache_97_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_97_squeeze_mask_0 = const()[name = string("v_cache_97_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_97_cast_fp16 = slice_by_index(begin = v_cache_97_begin_0, end = v_cache_97_end_0, end_mask = v_cache_97_end_mask_0, squeeze_mask = v_cache_97_squeeze_mask_0, x = coreml_update_state_111)[name = string("v_cache_97_cast_fp16")]; + tensor k_cache_99_begin_0 = const()[name = string("k_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor k_cache_99_end_0 = const()[name = string("k_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; + tensor k_cache_99_end_mask_0 = const()[name = string("k_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_99_squeeze_mask_0 = const()[name = string("k_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_99_cast_fp16 = slice_by_index(begin = k_cache_99_begin_0, end = k_cache_99_end_0, end_mask = k_cache_99_end_mask_0, squeeze_mask = k_cache_99_squeeze_mask_0, x = read_state_2)[name = string("k_cache_99_cast_fp16")]; + tensor v_cache_99_begin_0 = const()[name = string("v_cache_99_begin_0"), val = tensor([24, 0, 0, 0])]; + tensor v_cache_99_end_0 = const()[name = string("v_cache_99_end_0"), val = tensor([25, 1, 1500, 1280])]; + tensor v_cache_99_end_mask_0 = const()[name = string("v_cache_99_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_99_squeeze_mask_0 = const()[name = string("v_cache_99_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_99_cast_fp16 = slice_by_index(begin = v_cache_99_begin_0, end = v_cache_99_end_0, end_mask = v_cache_99_end_mask_0, squeeze_mask = v_cache_99_squeeze_mask_0, x = read_state_3)[name = string("v_cache_99_cast_fp16")]; + int32 var_5244 = const()[name = string("op_5244"), val = int32(-1)]; + tensor var_5262_axes_0 = const()[name = string("op_5262_axes_0"), val = tensor([-1])]; + tensor blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240188288)))]; + tensor blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240190912)))]; + fp16 var_5250_to_fp16 = const()[name = string("op_5250_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5262_cast_fp16 = layer_norm(axes = var_5262_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5262_cast_fp16")]; + tensor var_5273_to_fp16 = const()[name = string("op_5273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1240193536)))]; + tensor var_5274_to_fp16 = const()[name = string("op_5274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243470400)))]; + tensor linear_192_cast_fp16 = linear(bias = var_5274_to_fp16, weight = var_5273_to_fp16, x = var_5262_cast_fp16)[name = string("linear_192_cast_fp16")]; + tensor var_5277_to_fp16 = const()[name = string("op_5277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1243473024)))]; + tensor linear_193_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5277_to_fp16, x = var_5262_cast_fp16)[name = string("linear_193_cast_fp16")]; + tensor var_5281_to_fp16 = const()[name = string("op_5281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1246749888)))]; + tensor var_5282_to_fp16 = const()[name = string("op_5282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250026752)))]; + tensor linear_194_cast_fp16 = linear(bias = var_5282_to_fp16, weight = var_5281_to_fp16, x = var_5262_cast_fp16)[name = string("linear_194_cast_fp16")]; + tensor var_5284_shape_cast_fp16 = shape(x = linear_192_cast_fp16)[name = string("op_5284_shape_cast_fp16")]; + int32 gather_290_axis_0 = const()[name = string("gather_290_axis_0"), val = int32(0)]; + int32 gather_290_batch_dims_0 = const()[name = string("gather_290_batch_dims_0"), val = int32(0)]; + bool gather_290_validate_indices_0 = const()[name = string("gather_290_validate_indices_0"), val = bool(false)]; + string var_5284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_290_to_uint16 = const()[name = string("select_290_to_uint16"), val = uint16(1)]; + tensor var_5284_shape_cast_fp16_to_uint16 = cast(dtype = var_5284_shape_cast_fp16_to_uint16_dtype_0, x = var_5284_shape_cast_fp16)[name = string("cast_342")]; + uint16 gather_290_cast_uint16 = gather(axis = gather_290_axis_0, batch_dims = gather_290_batch_dims_0, indices = select_290_to_uint16, validate_indices = gather_290_validate_indices_0, x = var_5284_shape_cast_fp16_to_uint16)[name = string("gather_290_cast_uint16")]; + string gather_290_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_290_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_290_cast_uint16_to_int32 = cast(dtype = gather_290_cast_uint16_to_int32_dtype_0, x = gather_290_cast_uint16)[name = string("cast_341")]; + int32 end_step_51 = add(x = offset, y = gather_290_cast_uint16_to_int32)[name = string("end_step_51")]; + tensor expand_dims_384 = const()[name = string("expand_dims_384"), val = tensor([0])]; + tensor expand_dims_386 = const()[name = string("expand_dims_386"), val = tensor([0])]; + tensor expand_dims_387_axes_0 = const()[name = string("expand_dims_387_axes_0"), val = tensor([0])]; + tensor expand_dims_387 = expand_dims(axes = expand_dims_387_axes_0, x = end_step_51)[name = string("expand_dims_387")]; + tensor concat_532_values0_0 = const()[name = string("concat_532_values0_0"), val = tensor([24])]; + int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)]; + bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)]; + tensor concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (concat_532_values0_0, expand_dims_384, expand_dims_1, expand_dims_386))[name = string("concat_532")]; + tensor concat_533_values0_0 = const()[name = string("concat_533_values0_0"), val = tensor([0])]; + tensor concat_533_values1_0 = const()[name = string("concat_533_values1_0"), val = tensor([0])]; + tensor concat_533_values3_0 = const()[name = string("concat_533_values3_0"), val = tensor([0])]; + int32 concat_533_axis_0 = const()[name = string("concat_533_axis_0"), val = int32(0)]; + bool concat_533_interleave_0 = const()[name = string("concat_533_interleave_0"), val = bool(false)]; + tensor concat_533 = concat(axis = concat_533_axis_0, interleave = concat_533_interleave_0, values = (concat_533_values0_0, concat_533_values1_0, expand_dims_387, concat_533_values3_0))[name = string("concat_533")]; + tensor k_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = k_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = k_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_25_stride_0, update = linear_193_cast_fp16, x = coreml_update_state_110)[name = string("k_cache1_internal_tensor_assign_25_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_25_cast_fp16, input = k_cache1)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = k_cache1)[name = string("coreml_update_state_112")]; + tensor v_cache1_internal_tensor_assign_25_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_25_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_25_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_532, begin_mask = v_cache1_internal_tensor_assign_25_begin_mask_0, end = concat_533, end_mask = v_cache1_internal_tensor_assign_25_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_25_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_25_stride_0, update = linear_194_cast_fp16, x = coreml_update_state_111)[name = string("v_cache1_internal_tensor_assign_25_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_25_cast_fp16, input = v_cache1)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = v_cache1)[name = string("coreml_update_state_113")]; + int32 concat_538_values0_0 = const()[name = string("concat_538_values0_0"), val = int32(1)]; + int32 concat_538_values2_0 = const()[name = string("concat_538_values2_0"), val = int32(1280)]; + int32 concat_538_axis_0 = const()[name = string("concat_538_axis_0"), val = int32(0)]; + bool concat_538_interleave_0 = const()[name = string("concat_538_interleave_0"), val = bool(false)]; + tensor concat_538 = concat(axis = concat_538_axis_0, interleave = concat_538_interleave_0, values = (concat_538_values0_0, end_step_51, concat_538_values2_0))[name = string("concat_538")]; + tensor var_5300_begin_0 = const()[name = string("op_5300_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5300_end_mask_0 = const()[name = string("op_5300_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5300_cast_fp16 = slice_by_index(begin = var_5300_begin_0, end = concat_538, end_mask = var_5300_end_mask_0, x = k_cache_97_cast_fp16)[name = string("op_5300_cast_fp16")]; + tensor var_5303_begin_0 = const()[name = string("op_5303_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5303_end_mask_0 = const()[name = string("op_5303_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5303_cast_fp16 = slice_by_index(begin = var_5303_begin_0, end = concat_538, end_mask = var_5303_end_mask_0, x = v_cache_97_cast_fp16)[name = string("op_5303_cast_fp16")]; + tensor concat_540x = const()[name = string("concat_540x"), val = tensor([1, -1, 20, 64])]; + tensor var_5313_cast_fp16 = reshape(shape = concat_540x, x = linear_192_cast_fp16)[name = string("op_5313_cast_fp16")]; + tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_195_cast_fp16 = mul(x = var_5313_cast_fp16, y = const_256_to_fp16)[name = string("q_195_cast_fp16")]; + tensor concat_541x = const()[name = string("concat_541x"), val = tensor([1, -1, 20, 64])]; + tensor var_5320_cast_fp16 = reshape(shape = concat_541x, x = var_5300_cast_fp16)[name = string("op_5320_cast_fp16")]; + tensor const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_245_cast_fp16 = mul(x = var_5320_cast_fp16, y = const_257_to_fp16)[name = string("k_245_cast_fp16")]; + tensor concat_542x = const()[name = string("concat_542x"), val = tensor([1, -1, 20, 64])]; + tensor var_5327_cast_fp16 = reshape(shape = concat_542x, x = var_5303_cast_fp16)[name = string("op_5327_cast_fp16")]; + tensor var_5328 = const()[name = string("op_5328"), val = tensor([0, 2, 1, 3])]; + bool qk_145_transpose_x_0 = const()[name = string("qk_145_transpose_x_0"), val = bool(false)]; + bool qk_145_transpose_y_0 = const()[name = string("qk_145_transpose_y_0"), val = bool(false)]; + tensor transpose_353_perm_0 = const()[name = string("transpose_353_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_354_perm_0 = const()[name = string("transpose_354_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_354 = transpose(perm = transpose_354_perm_0, x = k_245_cast_fp16)[name = string("transpose_446")]; + tensor transpose_353 = transpose(perm = transpose_353_perm_0, x = q_195_cast_fp16)[name = string("transpose_447")]; + tensor qk_145_cast_fp16 = matmul(transpose_x = qk_145_transpose_x_0, transpose_y = qk_145_transpose_y_0, x = transpose_353, y = transpose_354)[name = string("qk_145_cast_fp16")]; + int32 concat_543_values1_0 = const()[name = string("concat_543_values1_0"), val = int32(448)]; + int32 concat_543_axis_0 = const()[name = string("concat_543_axis_0"), val = int32(0)]; + bool concat_543_interleave_0 = const()[name = string("concat_543_interleave_0"), val = bool(false)]; + tensor concat_543 = concat(axis = concat_543_axis_0, interleave = concat_543_interleave_0, values = (gather_290_cast_uint16_to_int32, concat_543_values1_0))[name = string("concat_543")]; + tensor var_5331_begin_0 = const()[name = string("op_5331_begin_0"), val = tensor([0, 0])]; + tensor var_5331_end_mask_0 = const()[name = string("op_5331_end_mask_0"), val = tensor([false, true])]; + tensor var_5331_cast_fp16 = slice_by_index(begin = var_5331_begin_0, end = concat_543, end_mask = var_5331_end_mask_0, x = mask_to_fp16)[name = string("op_5331_cast_fp16")]; + int32 concat_544_values0_0 = const()[name = string("concat_544_values0_0"), val = int32(0)]; + int32 concat_544_axis_0 = const()[name = string("concat_544_axis_0"), val = int32(0)]; + bool concat_544_interleave_0 = const()[name = string("concat_544_interleave_0"), val = bool(false)]; + tensor concat_544 = concat(axis = concat_544_axis_0, interleave = concat_544_interleave_0, values = (concat_544_values0_0, gather_290_cast_uint16_to_int32))[name = string("concat_544")]; + tensor var_5332_begin_0 = const()[name = string("op_5332_begin_0"), val = tensor([0, 0])]; + tensor var_5332_end_mask_0 = const()[name = string("op_5332_end_mask_0"), val = tensor([true, false])]; + tensor var_5332_cast_fp16 = slice_by_index(begin = var_5332_begin_0, end = concat_544, end_mask = var_5332_end_mask_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")]; + tensor qk_147_cast_fp16 = add(x = qk_145_cast_fp16, y = var_5332_cast_fp16)[name = string("qk_147_cast_fp16")]; + tensor var_5335_cast_fp16 = softmax(axis = var_5244, x = qk_147_cast_fp16)[name = string("op_5335_cast_fp16")]; + bool var_5337_transpose_x_0 = const()[name = string("op_5337_transpose_x_0"), val = bool(false)]; + bool var_5337_transpose_y_0 = const()[name = string("op_5337_transpose_y_0"), val = bool(false)]; + tensor v_245_cast_fp16 = transpose(perm = var_5328, x = var_5327_cast_fp16)[name = string("transpose_448")]; + tensor var_5337_cast_fp16 = matmul(transpose_x = var_5337_transpose_x_0, transpose_y = var_5337_transpose_y_0, x = var_5335_cast_fp16, y = v_245_cast_fp16)[name = string("op_5337_cast_fp16")]; + tensor var_5338 = const()[name = string("op_5338"), val = tensor([0, 2, 1, 3])]; + tensor concat_545x = const()[name = string("concat_545x"), val = tensor([1, -1, 1280])]; + tensor var_5339_cast_fp16 = transpose(perm = var_5338, x = var_5337_cast_fp16)[name = string("transpose_445")]; + tensor x_439_cast_fp16 = reshape(shape = concat_545x, x = var_5339_cast_fp16)[name = string("x_439_cast_fp16")]; + tensor var_5343_to_fp16 = const()[name = string("op_5343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1250029376)))]; + tensor var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253306240)))]; + tensor linear_195_cast_fp16 = linear(bias = var_5344_to_fp16, weight = var_5343_to_fp16, x = x_439_cast_fp16)[name = string("linear_195_cast_fp16")]; + tensor x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_195_cast_fp16)[name = string("x_441_cast_fp16")]; + tensor var_5351_axes_0 = const()[name = string("op_5351_axes_0"), val = tensor([-1])]; + tensor blocks_24_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253308864)))]; + tensor blocks_24_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253311488)))]; + tensor var_5351_cast_fp16 = layer_norm(axes = var_5351_axes_0, beta = blocks_24_cross_attn_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_cross_attn_ln_weight_to_fp16, x = x_441_cast_fp16)[name = string("op_5351_cast_fp16")]; + tensor var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1253314112)))]; + tensor var_5361_to_fp16 = const()[name = string("op_5361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256590976)))]; + tensor linear_196_cast_fp16 = linear(bias = var_5361_to_fp16, weight = var_5360_to_fp16, x = var_5351_cast_fp16)[name = string("linear_196_cast_fp16")]; + tensor concat_546 = const()[name = string("concat_546"), val = tensor([0, 0, 0])]; + tensor concat_547 = const()[name = string("concat_547"), val = tensor([0, 1500, 0])]; + tensor k_247_internal_tensor_assign_1_stride_0 = const()[name = string("k_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_546, begin_mask = k_247_internal_tensor_assign_1_begin_mask_0, end = concat_547, end_mask = k_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_247_internal_tensor_assign_1_squeeze_mask_0, stride = k_247_internal_tensor_assign_1_stride_0, update = k_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("k_247_internal_tensor_assign_1_cast_fp16")]; + tensor concat_548 = const()[name = string("concat_548"), val = tensor([0, 0, 0])]; + tensor concat_549 = const()[name = string("concat_549"), val = tensor([0, 1500, 0])]; + tensor v_247_internal_tensor_assign_1_stride_0 = const()[name = string("v_247_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_247_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_247_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_247_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_247_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_247_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_548, begin_mask = v_247_internal_tensor_assign_1_begin_mask_0, end = concat_549, end_mask = v_247_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_247_internal_tensor_assign_1_squeeze_mask_0, stride = v_247_internal_tensor_assign_1_stride_0, update = v_cache_99_cast_fp16, x = k_7_to_fp16)[name = string("v_247_internal_tensor_assign_1_cast_fp16")]; + tensor concat_550x = const()[name = string("concat_550x"), val = tensor([1, -1, 20, 64])]; + tensor var_5381_cast_fp16 = reshape(shape = concat_550x, x = linear_196_cast_fp16)[name = string("op_5381_cast_fp16")]; + tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_199_cast_fp16 = mul(x = var_5381_cast_fp16, y = const_258_to_fp16)[name = string("q_199_cast_fp16")]; + tensor var_5387 = const()[name = string("op_5387"), val = tensor([1, 1500, 20, -1])]; + tensor var_5388_cast_fp16 = reshape(shape = var_5387, x = k_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5388_cast_fp16")]; + tensor const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_249_cast_fp16 = mul(x = var_5388_cast_fp16, y = const_259_to_fp16)[name = string("k_249_cast_fp16")]; + tensor var_5394 = const()[name = string("op_5394"), val = tensor([1, 1500, 20, -1])]; + tensor var_5395_cast_fp16 = reshape(shape = var_5394, x = v_247_internal_tensor_assign_1_cast_fp16)[name = string("op_5395_cast_fp16")]; + tensor var_5396 = const()[name = string("op_5396"), val = tensor([0, 2, 1, 3])]; + bool qk_149_transpose_x_0 = const()[name = string("qk_149_transpose_x_0"), val = bool(false)]; + bool qk_149_transpose_y_0 = const()[name = string("qk_149_transpose_y_0"), val = bool(false)]; + tensor transpose_355_perm_0 = const()[name = string("transpose_355_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_356_perm_0 = const()[name = string("transpose_356_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_356 = transpose(perm = transpose_356_perm_0, x = k_249_cast_fp16)[name = string("transpose_442")]; + tensor transpose_355 = transpose(perm = transpose_355_perm_0, x = q_199_cast_fp16)[name = string("transpose_443")]; + tensor qk_149_cast_fp16 = matmul(transpose_x = qk_149_transpose_x_0, transpose_y = qk_149_transpose_y_0, x = transpose_355, y = transpose_356)[name = string("qk_149_cast_fp16")]; + tensor var_5400_cast_fp16 = softmax(axis = var_5244, x = qk_149_cast_fp16)[name = string("op_5400_cast_fp16")]; + bool var_5402_transpose_x_0 = const()[name = string("op_5402_transpose_x_0"), val = bool(false)]; + bool var_5402_transpose_y_0 = const()[name = string("op_5402_transpose_y_0"), val = bool(false)]; + tensor v_249_cast_fp16 = transpose(perm = var_5396, x = var_5395_cast_fp16)[name = string("transpose_444")]; + tensor var_5402_cast_fp16 = matmul(transpose_x = var_5402_transpose_x_0, transpose_y = var_5402_transpose_y_0, x = var_5400_cast_fp16, y = v_249_cast_fp16)[name = string("op_5402_cast_fp16")]; + tensor var_5403 = const()[name = string("op_5403"), val = tensor([0, 2, 1, 3])]; + tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 1280])]; + tensor var_5404_cast_fp16 = transpose(perm = var_5403, x = var_5402_cast_fp16)[name = string("transpose_441")]; + tensor x_445_cast_fp16 = reshape(shape = concat_551x, x = var_5404_cast_fp16)[name = string("x_445_cast_fp16")]; + tensor var_5408_to_fp16 = const()[name = string("op_5408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1256593600)))]; + tensor var_5409_to_fp16 = const()[name = string("op_5409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259870464)))]; + tensor linear_197_cast_fp16 = linear(bias = var_5409_to_fp16, weight = var_5408_to_fp16, x = x_445_cast_fp16)[name = string("linear_197_cast_fp16")]; + tensor x_447_cast_fp16 = add(x = x_441_cast_fp16, y = linear_197_cast_fp16)[name = string("x_447_cast_fp16")]; + tensor var_5416_axes_0 = const()[name = string("op_5416_axes_0"), val = tensor([-1])]; + tensor blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259873088)))]; + tensor blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259875712)))]; + tensor var_5416_cast_fp16 = layer_norm(axes = var_5416_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_5250_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_447_cast_fp16)[name = string("op_5416_cast_fp16")]; + tensor var_5425_to_fp16 = const()[name = string("op_5425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1259878336)))]; + tensor var_5426_to_fp16 = const()[name = string("op_5426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272985600)))]; + tensor linear_198_cast_fp16 = linear(bias = var_5426_to_fp16, weight = var_5425_to_fp16, x = var_5416_cast_fp16)[name = string("linear_198_cast_fp16")]; + string x_451_mode_0 = const()[name = string("x_451_mode_0"), val = string("EXACT")]; + tensor x_451_cast_fp16 = gelu(mode = x_451_mode_0, x = linear_198_cast_fp16)[name = string("x_451_cast_fp16")]; + tensor var_5431_to_fp16 = const()[name = string("op_5431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1272995904)))]; + tensor var_5432_to_fp16 = const()[name = string("op_5432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286103168)))]; + tensor linear_199_cast_fp16 = linear(bias = var_5432_to_fp16, weight = var_5431_to_fp16, x = x_451_cast_fp16)[name = string("linear_199_cast_fp16")]; + tensor x_453_cast_fp16 = add(x = x_447_cast_fp16, y = linear_199_cast_fp16)[name = string("x_453_cast_fp16")]; + tensor k_cache_101_begin_0 = const()[name = string("k_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor k_cache_101_end_0 = const()[name = string("k_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; + tensor k_cache_101_end_mask_0 = const()[name = string("k_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_101_squeeze_mask_0 = const()[name = string("k_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_101_cast_fp16 = slice_by_index(begin = k_cache_101_begin_0, end = k_cache_101_end_0, end_mask = k_cache_101_end_mask_0, squeeze_mask = k_cache_101_squeeze_mask_0, x = coreml_update_state_112)[name = string("k_cache_101_cast_fp16")]; + tensor v_cache_101_begin_0 = const()[name = string("v_cache_101_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor v_cache_101_end_0 = const()[name = string("v_cache_101_end_0"), val = tensor([26, 1, 448, 1280])]; + tensor v_cache_101_end_mask_0 = const()[name = string("v_cache_101_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_101_squeeze_mask_0 = const()[name = string("v_cache_101_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_101_cast_fp16 = slice_by_index(begin = v_cache_101_begin_0, end = v_cache_101_end_0, end_mask = v_cache_101_end_mask_0, squeeze_mask = v_cache_101_squeeze_mask_0, x = coreml_update_state_113)[name = string("v_cache_101_cast_fp16")]; + tensor k_cache_103_begin_0 = const()[name = string("k_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor k_cache_103_end_0 = const()[name = string("k_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; + tensor k_cache_103_end_mask_0 = const()[name = string("k_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_103_squeeze_mask_0 = const()[name = string("k_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_103_cast_fp16 = slice_by_index(begin = k_cache_103_begin_0, end = k_cache_103_end_0, end_mask = k_cache_103_end_mask_0, squeeze_mask = k_cache_103_squeeze_mask_0, x = read_state_2)[name = string("k_cache_103_cast_fp16")]; + tensor v_cache_103_begin_0 = const()[name = string("v_cache_103_begin_0"), val = tensor([25, 0, 0, 0])]; + tensor v_cache_103_end_0 = const()[name = string("v_cache_103_end_0"), val = tensor([26, 1, 1500, 1280])]; + tensor v_cache_103_end_mask_0 = const()[name = string("v_cache_103_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_103_squeeze_mask_0 = const()[name = string("v_cache_103_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_103_cast_fp16 = slice_by_index(begin = v_cache_103_begin_0, end = v_cache_103_end_0, end_mask = v_cache_103_end_mask_0, squeeze_mask = v_cache_103_squeeze_mask_0, x = read_state_3)[name = string("v_cache_103_cast_fp16")]; + int32 var_5455 = const()[name = string("op_5455"), val = int32(-1)]; + tensor var_5473_axes_0 = const()[name = string("op_5473_axes_0"), val = tensor([-1])]; + tensor blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286105792)))]; + tensor blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286108416)))]; + fp16 var_5461_to_fp16 = const()[name = string("op_5461_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5473_cast_fp16 = layer_norm(axes = var_5473_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_453_cast_fp16)[name = string("op_5473_cast_fp16")]; + tensor var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1286111040)))]; + tensor var_5485_to_fp16 = const()[name = string("op_5485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289387904)))]; + tensor linear_200_cast_fp16 = linear(bias = var_5485_to_fp16, weight = var_5484_to_fp16, x = var_5473_cast_fp16)[name = string("linear_200_cast_fp16")]; + tensor var_5488_to_fp16 = const()[name = string("op_5488_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1289390528)))]; + tensor linear_201_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5488_to_fp16, x = var_5473_cast_fp16)[name = string("linear_201_cast_fp16")]; + tensor var_5492_to_fp16 = const()[name = string("op_5492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1292667392)))]; + tensor var_5493_to_fp16 = const()[name = string("op_5493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295944256)))]; + tensor linear_202_cast_fp16 = linear(bias = var_5493_to_fp16, weight = var_5492_to_fp16, x = var_5473_cast_fp16)[name = string("linear_202_cast_fp16")]; + tensor var_5495_shape_cast_fp16 = shape(x = linear_200_cast_fp16)[name = string("op_5495_shape_cast_fp16")]; + int32 gather_302_axis_0 = const()[name = string("gather_302_axis_0"), val = int32(0)]; + int32 gather_302_batch_dims_0 = const()[name = string("gather_302_batch_dims_0"), val = int32(0)]; + bool gather_302_validate_indices_0 = const()[name = string("gather_302_validate_indices_0"), val = bool(false)]; + string var_5495_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5495_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_302_to_uint16 = const()[name = string("select_302_to_uint16"), val = uint16(1)]; + tensor var_5495_shape_cast_fp16_to_uint16 = cast(dtype = var_5495_shape_cast_fp16_to_uint16_dtype_0, x = var_5495_shape_cast_fp16)[name = string("cast_340")]; + uint16 gather_302_cast_uint16 = gather(axis = gather_302_axis_0, batch_dims = gather_302_batch_dims_0, indices = select_302_to_uint16, validate_indices = gather_302_validate_indices_0, x = var_5495_shape_cast_fp16_to_uint16)[name = string("gather_302_cast_uint16")]; + string gather_302_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_302_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_302_cast_uint16_to_int32 = cast(dtype = gather_302_cast_uint16_to_int32_dtype_0, x = gather_302_cast_uint16)[name = string("cast_339")]; + int32 end_step_53 = add(x = offset, y = gather_302_cast_uint16_to_int32)[name = string("end_step_53")]; + tensor expand_dims_400 = const()[name = string("expand_dims_400"), val = tensor([0])]; + tensor expand_dims_402 = const()[name = string("expand_dims_402"), val = tensor([0])]; + tensor expand_dims_403_axes_0 = const()[name = string("expand_dims_403_axes_0"), val = tensor([0])]; + tensor expand_dims_403 = expand_dims(axes = expand_dims_403_axes_0, x = end_step_53)[name = string("expand_dims_403")]; + tensor concat_554_values0_0 = const()[name = string("concat_554_values0_0"), val = tensor([25])]; + int32 concat_554_axis_0 = const()[name = string("concat_554_axis_0"), val = int32(0)]; + bool concat_554_interleave_0 = const()[name = string("concat_554_interleave_0"), val = bool(false)]; + tensor concat_554 = concat(axis = concat_554_axis_0, interleave = concat_554_interleave_0, values = (concat_554_values0_0, expand_dims_400, expand_dims_1, expand_dims_402))[name = string("concat_554")]; + tensor concat_555_values0_0 = const()[name = string("concat_555_values0_0"), val = tensor([0])]; + tensor concat_555_values1_0 = const()[name = string("concat_555_values1_0"), val = tensor([0])]; + tensor concat_555_values3_0 = const()[name = string("concat_555_values3_0"), val = tensor([0])]; + int32 concat_555_axis_0 = const()[name = string("concat_555_axis_0"), val = int32(0)]; + bool concat_555_interleave_0 = const()[name = string("concat_555_interleave_0"), val = bool(false)]; + tensor concat_555 = concat(axis = concat_555_axis_0, interleave = concat_555_interleave_0, values = (concat_555_values0_0, concat_555_values1_0, expand_dims_403, concat_555_values3_0))[name = string("concat_555")]; + tensor k_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = k_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = k_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_26_stride_0, update = linear_201_cast_fp16, x = coreml_update_state_112)[name = string("k_cache1_internal_tensor_assign_26_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_26_cast_fp16, input = k_cache1)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = k_cache1)[name = string("coreml_update_state_114")]; + tensor v_cache1_internal_tensor_assign_26_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_26_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_26_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_554, begin_mask = v_cache1_internal_tensor_assign_26_begin_mask_0, end = concat_555, end_mask = v_cache1_internal_tensor_assign_26_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_26_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_26_stride_0, update = linear_202_cast_fp16, x = coreml_update_state_113)[name = string("v_cache1_internal_tensor_assign_26_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_26_cast_fp16, input = v_cache1)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = v_cache1)[name = string("coreml_update_state_115")]; + int32 concat_560_values0_0 = const()[name = string("concat_560_values0_0"), val = int32(1)]; + int32 concat_560_values2_0 = const()[name = string("concat_560_values2_0"), val = int32(1280)]; + int32 concat_560_axis_0 = const()[name = string("concat_560_axis_0"), val = int32(0)]; + bool concat_560_interleave_0 = const()[name = string("concat_560_interleave_0"), val = bool(false)]; + tensor concat_560 = concat(axis = concat_560_axis_0, interleave = concat_560_interleave_0, values = (concat_560_values0_0, end_step_53, concat_560_values2_0))[name = string("concat_560")]; + tensor var_5511_begin_0 = const()[name = string("op_5511_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5511_end_mask_0 = const()[name = string("op_5511_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5511_cast_fp16 = slice_by_index(begin = var_5511_begin_0, end = concat_560, end_mask = var_5511_end_mask_0, x = k_cache_101_cast_fp16)[name = string("op_5511_cast_fp16")]; + tensor var_5514_begin_0 = const()[name = string("op_5514_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5514_end_mask_0 = const()[name = string("op_5514_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = concat_560, end_mask = var_5514_end_mask_0, x = v_cache_101_cast_fp16)[name = string("op_5514_cast_fp16")]; + tensor concat_562x = const()[name = string("concat_562x"), val = tensor([1, -1, 20, 64])]; + tensor var_5524_cast_fp16 = reshape(shape = concat_562x, x = linear_200_cast_fp16)[name = string("op_5524_cast_fp16")]; + tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_203_cast_fp16 = mul(x = var_5524_cast_fp16, y = const_260_to_fp16)[name = string("q_203_cast_fp16")]; + tensor concat_563x = const()[name = string("concat_563x"), val = tensor([1, -1, 20, 64])]; + tensor var_5531_cast_fp16 = reshape(shape = concat_563x, x = var_5511_cast_fp16)[name = string("op_5531_cast_fp16")]; + tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_255_cast_fp16 = mul(x = var_5531_cast_fp16, y = const_261_to_fp16)[name = string("k_255_cast_fp16")]; + tensor concat_564x = const()[name = string("concat_564x"), val = tensor([1, -1, 20, 64])]; + tensor var_5538_cast_fp16 = reshape(shape = concat_564x, x = var_5514_cast_fp16)[name = string("op_5538_cast_fp16")]; + tensor var_5539 = const()[name = string("op_5539"), val = tensor([0, 2, 1, 3])]; + bool qk_151_transpose_x_0 = const()[name = string("qk_151_transpose_x_0"), val = bool(false)]; + bool qk_151_transpose_y_0 = const()[name = string("qk_151_transpose_y_0"), val = bool(false)]; + tensor transpose_357_perm_0 = const()[name = string("transpose_357_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_358_perm_0 = const()[name = string("transpose_358_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_358 = transpose(perm = transpose_358_perm_0, x = k_255_cast_fp16)[name = string("transpose_438")]; + tensor transpose_357 = transpose(perm = transpose_357_perm_0, x = q_203_cast_fp16)[name = string("transpose_439")]; + tensor qk_151_cast_fp16 = matmul(transpose_x = qk_151_transpose_x_0, transpose_y = qk_151_transpose_y_0, x = transpose_357, y = transpose_358)[name = string("qk_151_cast_fp16")]; + int32 concat_565_values1_0 = const()[name = string("concat_565_values1_0"), val = int32(448)]; + int32 concat_565_axis_0 = const()[name = string("concat_565_axis_0"), val = int32(0)]; + bool concat_565_interleave_0 = const()[name = string("concat_565_interleave_0"), val = bool(false)]; + tensor concat_565 = concat(axis = concat_565_axis_0, interleave = concat_565_interleave_0, values = (gather_302_cast_uint16_to_int32, concat_565_values1_0))[name = string("concat_565")]; + tensor var_5542_begin_0 = const()[name = string("op_5542_begin_0"), val = tensor([0, 0])]; + tensor var_5542_end_mask_0 = const()[name = string("op_5542_end_mask_0"), val = tensor([false, true])]; + tensor var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = concat_565, end_mask = var_5542_end_mask_0, x = mask_to_fp16)[name = string("op_5542_cast_fp16")]; + int32 concat_566_values0_0 = const()[name = string("concat_566_values0_0"), val = int32(0)]; + int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; + bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; + tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (concat_566_values0_0, gather_302_cast_uint16_to_int32))[name = string("concat_566")]; + tensor var_5543_begin_0 = const()[name = string("op_5543_begin_0"), val = tensor([0, 0])]; + tensor var_5543_end_mask_0 = const()[name = string("op_5543_end_mask_0"), val = tensor([true, false])]; + tensor var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = concat_566, end_mask = var_5543_end_mask_0, x = var_5542_cast_fp16)[name = string("op_5543_cast_fp16")]; + tensor qk_153_cast_fp16 = add(x = qk_151_cast_fp16, y = var_5543_cast_fp16)[name = string("qk_153_cast_fp16")]; + tensor var_5546_cast_fp16 = softmax(axis = var_5455, x = qk_153_cast_fp16)[name = string("op_5546_cast_fp16")]; + bool var_5548_transpose_x_0 = const()[name = string("op_5548_transpose_x_0"), val = bool(false)]; + bool var_5548_transpose_y_0 = const()[name = string("op_5548_transpose_y_0"), val = bool(false)]; + tensor v_255_cast_fp16 = transpose(perm = var_5539, x = var_5538_cast_fp16)[name = string("transpose_440")]; + tensor var_5548_cast_fp16 = matmul(transpose_x = var_5548_transpose_x_0, transpose_y = var_5548_transpose_y_0, x = var_5546_cast_fp16, y = v_255_cast_fp16)[name = string("op_5548_cast_fp16")]; + tensor var_5549 = const()[name = string("op_5549"), val = tensor([0, 2, 1, 3])]; + tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, -1, 1280])]; + tensor var_5550_cast_fp16 = transpose(perm = var_5549, x = var_5548_cast_fp16)[name = string("transpose_437")]; + tensor x_457_cast_fp16 = reshape(shape = concat_567x, x = var_5550_cast_fp16)[name = string("x_457_cast_fp16")]; + tensor var_5554_to_fp16 = const()[name = string("op_5554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1295946880)))]; + tensor var_5555_to_fp16 = const()[name = string("op_5555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299223744)))]; + tensor linear_203_cast_fp16 = linear(bias = var_5555_to_fp16, weight = var_5554_to_fp16, x = x_457_cast_fp16)[name = string("linear_203_cast_fp16")]; + tensor x_459_cast_fp16 = add(x = x_453_cast_fp16, y = linear_203_cast_fp16)[name = string("x_459_cast_fp16")]; + tensor var_5562_axes_0 = const()[name = string("op_5562_axes_0"), val = tensor([-1])]; + tensor blocks_25_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299226368)))]; + tensor blocks_25_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299228992)))]; + tensor var_5562_cast_fp16 = layer_norm(axes = var_5562_axes_0, beta = blocks_25_cross_attn_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_cross_attn_ln_weight_to_fp16, x = x_459_cast_fp16)[name = string("op_5562_cast_fp16")]; + tensor var_5571_to_fp16 = const()[name = string("op_5571_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1299231616)))]; + tensor var_5572_to_fp16 = const()[name = string("op_5572_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302508480)))]; + tensor linear_204_cast_fp16 = linear(bias = var_5572_to_fp16, weight = var_5571_to_fp16, x = var_5562_cast_fp16)[name = string("linear_204_cast_fp16")]; + tensor concat_568 = const()[name = string("concat_568"), val = tensor([0, 0, 0])]; + tensor concat_569 = const()[name = string("concat_569"), val = tensor([0, 1500, 0])]; + tensor k_257_internal_tensor_assign_1_stride_0 = const()[name = string("k_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_568, begin_mask = k_257_internal_tensor_assign_1_begin_mask_0, end = concat_569, end_mask = k_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_257_internal_tensor_assign_1_squeeze_mask_0, stride = k_257_internal_tensor_assign_1_stride_0, update = k_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("k_257_internal_tensor_assign_1_cast_fp16")]; + tensor concat_570 = const()[name = string("concat_570"), val = tensor([0, 0, 0])]; + tensor concat_571 = const()[name = string("concat_571"), val = tensor([0, 1500, 0])]; + tensor v_257_internal_tensor_assign_1_stride_0 = const()[name = string("v_257_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_257_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_257_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_257_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_257_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_257_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_570, begin_mask = v_257_internal_tensor_assign_1_begin_mask_0, end = concat_571, end_mask = v_257_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_257_internal_tensor_assign_1_squeeze_mask_0, stride = v_257_internal_tensor_assign_1_stride_0, update = v_cache_103_cast_fp16, x = k_7_to_fp16)[name = string("v_257_internal_tensor_assign_1_cast_fp16")]; + tensor concat_572x = const()[name = string("concat_572x"), val = tensor([1, -1, 20, 64])]; + tensor var_5592_cast_fp16 = reshape(shape = concat_572x, x = linear_204_cast_fp16)[name = string("op_5592_cast_fp16")]; + tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_207_cast_fp16 = mul(x = var_5592_cast_fp16, y = const_262_to_fp16)[name = string("q_207_cast_fp16")]; + tensor var_5598 = const()[name = string("op_5598"), val = tensor([1, 1500, 20, -1])]; + tensor var_5599_cast_fp16 = reshape(shape = var_5598, x = k_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5599_cast_fp16")]; + tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_259_cast_fp16 = mul(x = var_5599_cast_fp16, y = const_263_to_fp16)[name = string("k_259_cast_fp16")]; + tensor var_5605 = const()[name = string("op_5605"), val = tensor([1, 1500, 20, -1])]; + tensor var_5606_cast_fp16 = reshape(shape = var_5605, x = v_257_internal_tensor_assign_1_cast_fp16)[name = string("op_5606_cast_fp16")]; + tensor var_5607 = const()[name = string("op_5607"), val = tensor([0, 2, 1, 3])]; + bool qk_155_transpose_x_0 = const()[name = string("qk_155_transpose_x_0"), val = bool(false)]; + bool qk_155_transpose_y_0 = const()[name = string("qk_155_transpose_y_0"), val = bool(false)]; + tensor transpose_359_perm_0 = const()[name = string("transpose_359_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_360_perm_0 = const()[name = string("transpose_360_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_360 = transpose(perm = transpose_360_perm_0, x = k_259_cast_fp16)[name = string("transpose_434")]; + tensor transpose_359 = transpose(perm = transpose_359_perm_0, x = q_207_cast_fp16)[name = string("transpose_435")]; + tensor qk_155_cast_fp16 = matmul(transpose_x = qk_155_transpose_x_0, transpose_y = qk_155_transpose_y_0, x = transpose_359, y = transpose_360)[name = string("qk_155_cast_fp16")]; + tensor var_5611_cast_fp16 = softmax(axis = var_5455, x = qk_155_cast_fp16)[name = string("op_5611_cast_fp16")]; + bool var_5613_transpose_x_0 = const()[name = string("op_5613_transpose_x_0"), val = bool(false)]; + bool var_5613_transpose_y_0 = const()[name = string("op_5613_transpose_y_0"), val = bool(false)]; + tensor v_259_cast_fp16 = transpose(perm = var_5607, x = var_5606_cast_fp16)[name = string("transpose_436")]; + tensor var_5613_cast_fp16 = matmul(transpose_x = var_5613_transpose_x_0, transpose_y = var_5613_transpose_y_0, x = var_5611_cast_fp16, y = v_259_cast_fp16)[name = string("op_5613_cast_fp16")]; + tensor var_5614 = const()[name = string("op_5614"), val = tensor([0, 2, 1, 3])]; + tensor concat_573x = const()[name = string("concat_573x"), val = tensor([1, -1, 1280])]; + tensor var_5615_cast_fp16 = transpose(perm = var_5614, x = var_5613_cast_fp16)[name = string("transpose_433")]; + tensor x_463_cast_fp16 = reshape(shape = concat_573x, x = var_5615_cast_fp16)[name = string("x_463_cast_fp16")]; + tensor var_5619_to_fp16 = const()[name = string("op_5619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1302511104)))]; + tensor var_5620_to_fp16 = const()[name = string("op_5620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305787968)))]; + tensor linear_205_cast_fp16 = linear(bias = var_5620_to_fp16, weight = var_5619_to_fp16, x = x_463_cast_fp16)[name = string("linear_205_cast_fp16")]; + tensor x_465_cast_fp16 = add(x = x_459_cast_fp16, y = linear_205_cast_fp16)[name = string("x_465_cast_fp16")]; + tensor var_5627_axes_0 = const()[name = string("op_5627_axes_0"), val = tensor([-1])]; + tensor blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305790592)))]; + tensor blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305793216)))]; + tensor var_5627_cast_fp16 = layer_norm(axes = var_5627_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_5461_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_465_cast_fp16)[name = string("op_5627_cast_fp16")]; + tensor var_5636_to_fp16 = const()[name = string("op_5636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1305795840)))]; + tensor var_5637_to_fp16 = const()[name = string("op_5637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318903104)))]; + tensor linear_206_cast_fp16 = linear(bias = var_5637_to_fp16, weight = var_5636_to_fp16, x = var_5627_cast_fp16)[name = string("linear_206_cast_fp16")]; + string x_469_mode_0 = const()[name = string("x_469_mode_0"), val = string("EXACT")]; + tensor x_469_cast_fp16 = gelu(mode = x_469_mode_0, x = linear_206_cast_fp16)[name = string("x_469_cast_fp16")]; + tensor var_5642_to_fp16 = const()[name = string("op_5642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1318913408)))]; + tensor var_5643_to_fp16 = const()[name = string("op_5643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332020672)))]; + tensor linear_207_cast_fp16 = linear(bias = var_5643_to_fp16, weight = var_5642_to_fp16, x = x_469_cast_fp16)[name = string("linear_207_cast_fp16")]; + tensor x_471_cast_fp16 = add(x = x_465_cast_fp16, y = linear_207_cast_fp16)[name = string("x_471_cast_fp16")]; + tensor k_cache_105_begin_0 = const()[name = string("k_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor k_cache_105_end_0 = const()[name = string("k_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; + tensor k_cache_105_end_mask_0 = const()[name = string("k_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_105_squeeze_mask_0 = const()[name = string("k_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_105_cast_fp16 = slice_by_index(begin = k_cache_105_begin_0, end = k_cache_105_end_0, end_mask = k_cache_105_end_mask_0, squeeze_mask = k_cache_105_squeeze_mask_0, x = coreml_update_state_114)[name = string("k_cache_105_cast_fp16")]; + tensor v_cache_105_begin_0 = const()[name = string("v_cache_105_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor v_cache_105_end_0 = const()[name = string("v_cache_105_end_0"), val = tensor([27, 1, 448, 1280])]; + tensor v_cache_105_end_mask_0 = const()[name = string("v_cache_105_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_105_squeeze_mask_0 = const()[name = string("v_cache_105_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_105_cast_fp16 = slice_by_index(begin = v_cache_105_begin_0, end = v_cache_105_end_0, end_mask = v_cache_105_end_mask_0, squeeze_mask = v_cache_105_squeeze_mask_0, x = coreml_update_state_115)[name = string("v_cache_105_cast_fp16")]; + tensor k_cache_107_begin_0 = const()[name = string("k_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor k_cache_107_end_0 = const()[name = string("k_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; + tensor k_cache_107_end_mask_0 = const()[name = string("k_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_107_squeeze_mask_0 = const()[name = string("k_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_107_cast_fp16 = slice_by_index(begin = k_cache_107_begin_0, end = k_cache_107_end_0, end_mask = k_cache_107_end_mask_0, squeeze_mask = k_cache_107_squeeze_mask_0, x = read_state_2)[name = string("k_cache_107_cast_fp16")]; + tensor v_cache_107_begin_0 = const()[name = string("v_cache_107_begin_0"), val = tensor([26, 0, 0, 0])]; + tensor v_cache_107_end_0 = const()[name = string("v_cache_107_end_0"), val = tensor([27, 1, 1500, 1280])]; + tensor v_cache_107_end_mask_0 = const()[name = string("v_cache_107_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_107_squeeze_mask_0 = const()[name = string("v_cache_107_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_107_cast_fp16 = slice_by_index(begin = v_cache_107_begin_0, end = v_cache_107_end_0, end_mask = v_cache_107_end_mask_0, squeeze_mask = v_cache_107_squeeze_mask_0, x = read_state_3)[name = string("v_cache_107_cast_fp16")]; + int32 var_5666 = const()[name = string("op_5666"), val = int32(-1)]; + tensor var_5684_axes_0 = const()[name = string("op_5684_axes_0"), val = tensor([-1])]; + tensor blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332023296)))]; + tensor blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332025920)))]; + fp16 var_5672_to_fp16 = const()[name = string("op_5672_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5684_cast_fp16 = layer_norm(axes = var_5684_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_471_cast_fp16)[name = string("op_5684_cast_fp16")]; + tensor var_5695_to_fp16 = const()[name = string("op_5695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1332028544)))]; + tensor var_5696_to_fp16 = const()[name = string("op_5696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335305408)))]; + tensor linear_208_cast_fp16 = linear(bias = var_5696_to_fp16, weight = var_5695_to_fp16, x = var_5684_cast_fp16)[name = string("linear_208_cast_fp16")]; + tensor var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1335308032)))]; + tensor linear_209_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5699_to_fp16, x = var_5684_cast_fp16)[name = string("linear_209_cast_fp16")]; + tensor var_5703_to_fp16 = const()[name = string("op_5703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1338584896)))]; + tensor var_5704_to_fp16 = const()[name = string("op_5704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341861760)))]; + tensor linear_210_cast_fp16 = linear(bias = var_5704_to_fp16, weight = var_5703_to_fp16, x = var_5684_cast_fp16)[name = string("linear_210_cast_fp16")]; + tensor var_5706_shape_cast_fp16 = shape(x = linear_208_cast_fp16)[name = string("op_5706_shape_cast_fp16")]; + int32 gather_314_axis_0 = const()[name = string("gather_314_axis_0"), val = int32(0)]; + int32 gather_314_batch_dims_0 = const()[name = string("gather_314_batch_dims_0"), val = int32(0)]; + bool gather_314_validate_indices_0 = const()[name = string("gather_314_validate_indices_0"), val = bool(false)]; + string var_5706_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5706_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_314_to_uint16 = const()[name = string("select_314_to_uint16"), val = uint16(1)]; + tensor var_5706_shape_cast_fp16_to_uint16 = cast(dtype = var_5706_shape_cast_fp16_to_uint16_dtype_0, x = var_5706_shape_cast_fp16)[name = string("cast_338")]; + uint16 gather_314_cast_uint16 = gather(axis = gather_314_axis_0, batch_dims = gather_314_batch_dims_0, indices = select_314_to_uint16, validate_indices = gather_314_validate_indices_0, x = var_5706_shape_cast_fp16_to_uint16)[name = string("gather_314_cast_uint16")]; + string gather_314_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_314_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_314_cast_uint16_to_int32 = cast(dtype = gather_314_cast_uint16_to_int32_dtype_0, x = gather_314_cast_uint16)[name = string("cast_337")]; + int32 end_step_55 = add(x = offset, y = gather_314_cast_uint16_to_int32)[name = string("end_step_55")]; + tensor expand_dims_416 = const()[name = string("expand_dims_416"), val = tensor([0])]; + tensor expand_dims_418 = const()[name = string("expand_dims_418"), val = tensor([0])]; + tensor expand_dims_419_axes_0 = const()[name = string("expand_dims_419_axes_0"), val = tensor([0])]; + tensor expand_dims_419 = expand_dims(axes = expand_dims_419_axes_0, x = end_step_55)[name = string("expand_dims_419")]; + tensor concat_576_values0_0 = const()[name = string("concat_576_values0_0"), val = tensor([26])]; + int32 concat_576_axis_0 = const()[name = string("concat_576_axis_0"), val = int32(0)]; + bool concat_576_interleave_0 = const()[name = string("concat_576_interleave_0"), val = bool(false)]; + tensor concat_576 = concat(axis = concat_576_axis_0, interleave = concat_576_interleave_0, values = (concat_576_values0_0, expand_dims_416, expand_dims_1, expand_dims_418))[name = string("concat_576")]; + tensor concat_577_values0_0 = const()[name = string("concat_577_values0_0"), val = tensor([0])]; + tensor concat_577_values1_0 = const()[name = string("concat_577_values1_0"), val = tensor([0])]; + tensor concat_577_values3_0 = const()[name = string("concat_577_values3_0"), val = tensor([0])]; + int32 concat_577_axis_0 = const()[name = string("concat_577_axis_0"), val = int32(0)]; + bool concat_577_interleave_0 = const()[name = string("concat_577_interleave_0"), val = bool(false)]; + tensor concat_577 = concat(axis = concat_577_axis_0, interleave = concat_577_interleave_0, values = (concat_577_values0_0, concat_577_values1_0, expand_dims_419, concat_577_values3_0))[name = string("concat_577")]; + tensor k_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = k_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = k_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_27_stride_0, update = linear_209_cast_fp16, x = coreml_update_state_114)[name = string("k_cache1_internal_tensor_assign_27_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_27_cast_fp16, input = k_cache1)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = k_cache1)[name = string("coreml_update_state_116")]; + tensor v_cache1_internal_tensor_assign_27_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_27_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_27_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_576, begin_mask = v_cache1_internal_tensor_assign_27_begin_mask_0, end = concat_577, end_mask = v_cache1_internal_tensor_assign_27_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_27_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_27_stride_0, update = linear_210_cast_fp16, x = coreml_update_state_115)[name = string("v_cache1_internal_tensor_assign_27_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_27_cast_fp16, input = v_cache1)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = v_cache1)[name = string("coreml_update_state_117")]; + int32 concat_582_values0_0 = const()[name = string("concat_582_values0_0"), val = int32(1)]; + int32 concat_582_values2_0 = const()[name = string("concat_582_values2_0"), val = int32(1280)]; + int32 concat_582_axis_0 = const()[name = string("concat_582_axis_0"), val = int32(0)]; + bool concat_582_interleave_0 = const()[name = string("concat_582_interleave_0"), val = bool(false)]; + tensor concat_582 = concat(axis = concat_582_axis_0, interleave = concat_582_interleave_0, values = (concat_582_values0_0, end_step_55, concat_582_values2_0))[name = string("concat_582")]; + tensor var_5722_begin_0 = const()[name = string("op_5722_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5722_end_mask_0 = const()[name = string("op_5722_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5722_cast_fp16 = slice_by_index(begin = var_5722_begin_0, end = concat_582, end_mask = var_5722_end_mask_0, x = k_cache_105_cast_fp16)[name = string("op_5722_cast_fp16")]; + tensor var_5725_begin_0 = const()[name = string("op_5725_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5725_end_mask_0 = const()[name = string("op_5725_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = concat_582, end_mask = var_5725_end_mask_0, x = v_cache_105_cast_fp16)[name = string("op_5725_cast_fp16")]; + tensor concat_584x = const()[name = string("concat_584x"), val = tensor([1, -1, 20, 64])]; + tensor var_5735_cast_fp16 = reshape(shape = concat_584x, x = linear_208_cast_fp16)[name = string("op_5735_cast_fp16")]; + tensor const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_211_cast_fp16 = mul(x = var_5735_cast_fp16, y = const_264_to_fp16)[name = string("q_211_cast_fp16")]; + tensor concat_585x = const()[name = string("concat_585x"), val = tensor([1, -1, 20, 64])]; + tensor var_5742_cast_fp16 = reshape(shape = concat_585x, x = var_5722_cast_fp16)[name = string("op_5742_cast_fp16")]; + tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_265_cast_fp16 = mul(x = var_5742_cast_fp16, y = const_265_to_fp16)[name = string("k_265_cast_fp16")]; + tensor concat_586x = const()[name = string("concat_586x"), val = tensor([1, -1, 20, 64])]; + tensor var_5749_cast_fp16 = reshape(shape = concat_586x, x = var_5725_cast_fp16)[name = string("op_5749_cast_fp16")]; + tensor var_5750 = const()[name = string("op_5750"), val = tensor([0, 2, 1, 3])]; + bool qk_157_transpose_x_0 = const()[name = string("qk_157_transpose_x_0"), val = bool(false)]; + bool qk_157_transpose_y_0 = const()[name = string("qk_157_transpose_y_0"), val = bool(false)]; + tensor transpose_361_perm_0 = const()[name = string("transpose_361_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_362_perm_0 = const()[name = string("transpose_362_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_362 = transpose(perm = transpose_362_perm_0, x = k_265_cast_fp16)[name = string("transpose_430")]; + tensor transpose_361 = transpose(perm = transpose_361_perm_0, x = q_211_cast_fp16)[name = string("transpose_431")]; + tensor qk_157_cast_fp16 = matmul(transpose_x = qk_157_transpose_x_0, transpose_y = qk_157_transpose_y_0, x = transpose_361, y = transpose_362)[name = string("qk_157_cast_fp16")]; + int32 concat_587_values1_0 = const()[name = string("concat_587_values1_0"), val = int32(448)]; + int32 concat_587_axis_0 = const()[name = string("concat_587_axis_0"), val = int32(0)]; + bool concat_587_interleave_0 = const()[name = string("concat_587_interleave_0"), val = bool(false)]; + tensor concat_587 = concat(axis = concat_587_axis_0, interleave = concat_587_interleave_0, values = (gather_314_cast_uint16_to_int32, concat_587_values1_0))[name = string("concat_587")]; + tensor var_5753_begin_0 = const()[name = string("op_5753_begin_0"), val = tensor([0, 0])]; + tensor var_5753_end_mask_0 = const()[name = string("op_5753_end_mask_0"), val = tensor([false, true])]; + tensor var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = concat_587, end_mask = var_5753_end_mask_0, x = mask_to_fp16)[name = string("op_5753_cast_fp16")]; + int32 concat_588_values0_0 = const()[name = string("concat_588_values0_0"), val = int32(0)]; + int32 concat_588_axis_0 = const()[name = string("concat_588_axis_0"), val = int32(0)]; + bool concat_588_interleave_0 = const()[name = string("concat_588_interleave_0"), val = bool(false)]; + tensor concat_588 = concat(axis = concat_588_axis_0, interleave = concat_588_interleave_0, values = (concat_588_values0_0, gather_314_cast_uint16_to_int32))[name = string("concat_588")]; + tensor var_5754_begin_0 = const()[name = string("op_5754_begin_0"), val = tensor([0, 0])]; + tensor var_5754_end_mask_0 = const()[name = string("op_5754_end_mask_0"), val = tensor([true, false])]; + tensor var_5754_cast_fp16 = slice_by_index(begin = var_5754_begin_0, end = concat_588, end_mask = var_5754_end_mask_0, x = var_5753_cast_fp16)[name = string("op_5754_cast_fp16")]; + tensor qk_159_cast_fp16 = add(x = qk_157_cast_fp16, y = var_5754_cast_fp16)[name = string("qk_159_cast_fp16")]; + tensor var_5757_cast_fp16 = softmax(axis = var_5666, x = qk_159_cast_fp16)[name = string("op_5757_cast_fp16")]; + bool var_5759_transpose_x_0 = const()[name = string("op_5759_transpose_x_0"), val = bool(false)]; + bool var_5759_transpose_y_0 = const()[name = string("op_5759_transpose_y_0"), val = bool(false)]; + tensor v_265_cast_fp16 = transpose(perm = var_5750, x = var_5749_cast_fp16)[name = string("transpose_432")]; + tensor var_5759_cast_fp16 = matmul(transpose_x = var_5759_transpose_x_0, transpose_y = var_5759_transpose_y_0, x = var_5757_cast_fp16, y = v_265_cast_fp16)[name = string("op_5759_cast_fp16")]; + tensor var_5760 = const()[name = string("op_5760"), val = tensor([0, 2, 1, 3])]; + tensor concat_589x = const()[name = string("concat_589x"), val = tensor([1, -1, 1280])]; + tensor var_5761_cast_fp16 = transpose(perm = var_5760, x = var_5759_cast_fp16)[name = string("transpose_429")]; + tensor x_475_cast_fp16 = reshape(shape = concat_589x, x = var_5761_cast_fp16)[name = string("x_475_cast_fp16")]; + tensor var_5765_to_fp16 = const()[name = string("op_5765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1341864384)))]; + tensor var_5766_to_fp16 = const()[name = string("op_5766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345141248)))]; + tensor linear_211_cast_fp16 = linear(bias = var_5766_to_fp16, weight = var_5765_to_fp16, x = x_475_cast_fp16)[name = string("linear_211_cast_fp16")]; + tensor x_477_cast_fp16 = add(x = x_471_cast_fp16, y = linear_211_cast_fp16)[name = string("x_477_cast_fp16")]; + tensor var_5773_axes_0 = const()[name = string("op_5773_axes_0"), val = tensor([-1])]; + tensor blocks_26_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345143872)))]; + tensor blocks_26_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345146496)))]; + tensor var_5773_cast_fp16 = layer_norm(axes = var_5773_axes_0, beta = blocks_26_cross_attn_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_cross_attn_ln_weight_to_fp16, x = x_477_cast_fp16)[name = string("op_5773_cast_fp16")]; + tensor var_5782_to_fp16 = const()[name = string("op_5782_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1345149120)))]; + tensor var_5783_to_fp16 = const()[name = string("op_5783_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348425984)))]; + tensor linear_212_cast_fp16 = linear(bias = var_5783_to_fp16, weight = var_5782_to_fp16, x = var_5773_cast_fp16)[name = string("linear_212_cast_fp16")]; + tensor concat_590 = const()[name = string("concat_590"), val = tensor([0, 0, 0])]; + tensor concat_591 = const()[name = string("concat_591"), val = tensor([0, 1500, 0])]; + tensor k_267_internal_tensor_assign_1_stride_0 = const()[name = string("k_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_590, begin_mask = k_267_internal_tensor_assign_1_begin_mask_0, end = concat_591, end_mask = k_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_267_internal_tensor_assign_1_squeeze_mask_0, stride = k_267_internal_tensor_assign_1_stride_0, update = k_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("k_267_internal_tensor_assign_1_cast_fp16")]; + tensor concat_592 = const()[name = string("concat_592"), val = tensor([0, 0, 0])]; + tensor concat_593 = const()[name = string("concat_593"), val = tensor([0, 1500, 0])]; + tensor v_267_internal_tensor_assign_1_stride_0 = const()[name = string("v_267_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_267_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_267_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_267_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_267_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_267_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_592, begin_mask = v_267_internal_tensor_assign_1_begin_mask_0, end = concat_593, end_mask = v_267_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_267_internal_tensor_assign_1_squeeze_mask_0, stride = v_267_internal_tensor_assign_1_stride_0, update = v_cache_107_cast_fp16, x = k_7_to_fp16)[name = string("v_267_internal_tensor_assign_1_cast_fp16")]; + tensor concat_594x = const()[name = string("concat_594x"), val = tensor([1, -1, 20, 64])]; + tensor var_5803_cast_fp16 = reshape(shape = concat_594x, x = linear_212_cast_fp16)[name = string("op_5803_cast_fp16")]; + tensor const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_215_cast_fp16 = mul(x = var_5803_cast_fp16, y = const_266_to_fp16)[name = string("q_215_cast_fp16")]; + tensor var_5809 = const()[name = string("op_5809"), val = tensor([1, 1500, 20, -1])]; + tensor var_5810_cast_fp16 = reshape(shape = var_5809, x = k_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5810_cast_fp16")]; + tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_269_cast_fp16 = mul(x = var_5810_cast_fp16, y = const_267_to_fp16)[name = string("k_269_cast_fp16")]; + tensor var_5816 = const()[name = string("op_5816"), val = tensor([1, 1500, 20, -1])]; + tensor var_5817_cast_fp16 = reshape(shape = var_5816, x = v_267_internal_tensor_assign_1_cast_fp16)[name = string("op_5817_cast_fp16")]; + tensor var_5818 = const()[name = string("op_5818"), val = tensor([0, 2, 1, 3])]; + bool qk_161_transpose_x_0 = const()[name = string("qk_161_transpose_x_0"), val = bool(false)]; + bool qk_161_transpose_y_0 = const()[name = string("qk_161_transpose_y_0"), val = bool(false)]; + tensor transpose_363_perm_0 = const()[name = string("transpose_363_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_364_perm_0 = const()[name = string("transpose_364_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_364 = transpose(perm = transpose_364_perm_0, x = k_269_cast_fp16)[name = string("transpose_426")]; + tensor transpose_363 = transpose(perm = transpose_363_perm_0, x = q_215_cast_fp16)[name = string("transpose_427")]; + tensor qk_161_cast_fp16 = matmul(transpose_x = qk_161_transpose_x_0, transpose_y = qk_161_transpose_y_0, x = transpose_363, y = transpose_364)[name = string("qk_161_cast_fp16")]; + tensor var_5822_cast_fp16 = softmax(axis = var_5666, x = qk_161_cast_fp16)[name = string("op_5822_cast_fp16")]; + bool var_5824_transpose_x_0 = const()[name = string("op_5824_transpose_x_0"), val = bool(false)]; + bool var_5824_transpose_y_0 = const()[name = string("op_5824_transpose_y_0"), val = bool(false)]; + tensor v_269_cast_fp16 = transpose(perm = var_5818, x = var_5817_cast_fp16)[name = string("transpose_428")]; + tensor var_5824_cast_fp16 = matmul(transpose_x = var_5824_transpose_x_0, transpose_y = var_5824_transpose_y_0, x = var_5822_cast_fp16, y = v_269_cast_fp16)[name = string("op_5824_cast_fp16")]; + tensor var_5825 = const()[name = string("op_5825"), val = tensor([0, 2, 1, 3])]; + tensor concat_595x = const()[name = string("concat_595x"), val = tensor([1, -1, 1280])]; + tensor var_5826_cast_fp16 = transpose(perm = var_5825, x = var_5824_cast_fp16)[name = string("transpose_425")]; + tensor x_481_cast_fp16 = reshape(shape = concat_595x, x = var_5826_cast_fp16)[name = string("x_481_cast_fp16")]; + tensor var_5830_to_fp16 = const()[name = string("op_5830_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1348428608)))]; + tensor var_5831_to_fp16 = const()[name = string("op_5831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351705472)))]; + tensor linear_213_cast_fp16 = linear(bias = var_5831_to_fp16, weight = var_5830_to_fp16, x = x_481_cast_fp16)[name = string("linear_213_cast_fp16")]; + tensor x_483_cast_fp16 = add(x = x_477_cast_fp16, y = linear_213_cast_fp16)[name = string("x_483_cast_fp16")]; + tensor var_5838_axes_0 = const()[name = string("op_5838_axes_0"), val = tensor([-1])]; + tensor blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351708096)))]; + tensor blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351710720)))]; + tensor var_5838_cast_fp16 = layer_norm(axes = var_5838_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_5672_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_483_cast_fp16)[name = string("op_5838_cast_fp16")]; + tensor var_5847_to_fp16 = const()[name = string("op_5847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1351713344)))]; + tensor var_5848_to_fp16 = const()[name = string("op_5848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364820608)))]; + tensor linear_214_cast_fp16 = linear(bias = var_5848_to_fp16, weight = var_5847_to_fp16, x = var_5838_cast_fp16)[name = string("linear_214_cast_fp16")]; + string x_487_mode_0 = const()[name = string("x_487_mode_0"), val = string("EXACT")]; + tensor x_487_cast_fp16 = gelu(mode = x_487_mode_0, x = linear_214_cast_fp16)[name = string("x_487_cast_fp16")]; + tensor var_5853_to_fp16 = const()[name = string("op_5853_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1364830912)))]; + tensor var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377938176)))]; + tensor linear_215_cast_fp16 = linear(bias = var_5854_to_fp16, weight = var_5853_to_fp16, x = x_487_cast_fp16)[name = string("linear_215_cast_fp16")]; + tensor x_489_cast_fp16 = add(x = x_483_cast_fp16, y = linear_215_cast_fp16)[name = string("x_489_cast_fp16")]; + tensor k_cache_109_begin_0 = const()[name = string("k_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor k_cache_109_end_0 = const()[name = string("k_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; + tensor k_cache_109_end_mask_0 = const()[name = string("k_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_109_squeeze_mask_0 = const()[name = string("k_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_109_cast_fp16 = slice_by_index(begin = k_cache_109_begin_0, end = k_cache_109_end_0, end_mask = k_cache_109_end_mask_0, squeeze_mask = k_cache_109_squeeze_mask_0, x = coreml_update_state_116)[name = string("k_cache_109_cast_fp16")]; + tensor v_cache_109_begin_0 = const()[name = string("v_cache_109_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor v_cache_109_end_0 = const()[name = string("v_cache_109_end_0"), val = tensor([28, 1, 448, 1280])]; + tensor v_cache_109_end_mask_0 = const()[name = string("v_cache_109_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_109_squeeze_mask_0 = const()[name = string("v_cache_109_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_109_cast_fp16 = slice_by_index(begin = v_cache_109_begin_0, end = v_cache_109_end_0, end_mask = v_cache_109_end_mask_0, squeeze_mask = v_cache_109_squeeze_mask_0, x = coreml_update_state_117)[name = string("v_cache_109_cast_fp16")]; + tensor k_cache_111_begin_0 = const()[name = string("k_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor k_cache_111_end_0 = const()[name = string("k_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; + tensor k_cache_111_end_mask_0 = const()[name = string("k_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_111_squeeze_mask_0 = const()[name = string("k_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_111_cast_fp16 = slice_by_index(begin = k_cache_111_begin_0, end = k_cache_111_end_0, end_mask = k_cache_111_end_mask_0, squeeze_mask = k_cache_111_squeeze_mask_0, x = read_state_2)[name = string("k_cache_111_cast_fp16")]; + tensor v_cache_111_begin_0 = const()[name = string("v_cache_111_begin_0"), val = tensor([27, 0, 0, 0])]; + tensor v_cache_111_end_0 = const()[name = string("v_cache_111_end_0"), val = tensor([28, 1, 1500, 1280])]; + tensor v_cache_111_end_mask_0 = const()[name = string("v_cache_111_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_111_squeeze_mask_0 = const()[name = string("v_cache_111_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_111_cast_fp16 = slice_by_index(begin = v_cache_111_begin_0, end = v_cache_111_end_0, end_mask = v_cache_111_end_mask_0, squeeze_mask = v_cache_111_squeeze_mask_0, x = read_state_3)[name = string("v_cache_111_cast_fp16")]; + int32 var_5877 = const()[name = string("op_5877"), val = int32(-1)]; + tensor var_5895_axes_0 = const()[name = string("op_5895_axes_0"), val = tensor([-1])]; + tensor blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377940800)))]; + tensor blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377943424)))]; + fp16 var_5883_to_fp16 = const()[name = string("op_5883_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5895_cast_fp16 = layer_norm(axes = var_5895_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_489_cast_fp16)[name = string("op_5895_cast_fp16")]; + tensor var_5906_to_fp16 = const()[name = string("op_5906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1377946048)))]; + tensor var_5907_to_fp16 = const()[name = string("op_5907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381222912)))]; + tensor linear_216_cast_fp16 = linear(bias = var_5907_to_fp16, weight = var_5906_to_fp16, x = var_5895_cast_fp16)[name = string("linear_216_cast_fp16")]; + tensor var_5910_to_fp16 = const()[name = string("op_5910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1381225536)))]; + tensor linear_217_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5910_to_fp16, x = var_5895_cast_fp16)[name = string("linear_217_cast_fp16")]; + tensor var_5914_to_fp16 = const()[name = string("op_5914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1384502400)))]; + tensor var_5915_to_fp16 = const()[name = string("op_5915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387779264)))]; + tensor linear_218_cast_fp16 = linear(bias = var_5915_to_fp16, weight = var_5914_to_fp16, x = var_5895_cast_fp16)[name = string("linear_218_cast_fp16")]; + tensor var_5917_shape_cast_fp16 = shape(x = linear_216_cast_fp16)[name = string("op_5917_shape_cast_fp16")]; + int32 gather_326_axis_0 = const()[name = string("gather_326_axis_0"), val = int32(0)]; + int32 gather_326_batch_dims_0 = const()[name = string("gather_326_batch_dims_0"), val = int32(0)]; + bool gather_326_validate_indices_0 = const()[name = string("gather_326_validate_indices_0"), val = bool(false)]; + string var_5917_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5917_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_326_to_uint16 = const()[name = string("select_326_to_uint16"), val = uint16(1)]; + tensor var_5917_shape_cast_fp16_to_uint16 = cast(dtype = var_5917_shape_cast_fp16_to_uint16_dtype_0, x = var_5917_shape_cast_fp16)[name = string("cast_336")]; + uint16 gather_326_cast_uint16 = gather(axis = gather_326_axis_0, batch_dims = gather_326_batch_dims_0, indices = select_326_to_uint16, validate_indices = gather_326_validate_indices_0, x = var_5917_shape_cast_fp16_to_uint16)[name = string("gather_326_cast_uint16")]; + string gather_326_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_326_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_326_cast_uint16_to_int32 = cast(dtype = gather_326_cast_uint16_to_int32_dtype_0, x = gather_326_cast_uint16)[name = string("cast_335")]; + int32 end_step_57 = add(x = offset, y = gather_326_cast_uint16_to_int32)[name = string("end_step_57")]; + tensor expand_dims_432 = const()[name = string("expand_dims_432"), val = tensor([0])]; + tensor expand_dims_434 = const()[name = string("expand_dims_434"), val = tensor([0])]; + tensor expand_dims_435_axes_0 = const()[name = string("expand_dims_435_axes_0"), val = tensor([0])]; + tensor expand_dims_435 = expand_dims(axes = expand_dims_435_axes_0, x = end_step_57)[name = string("expand_dims_435")]; + tensor concat_598_values0_0 = const()[name = string("concat_598_values0_0"), val = tensor([27])]; + int32 concat_598_axis_0 = const()[name = string("concat_598_axis_0"), val = int32(0)]; + bool concat_598_interleave_0 = const()[name = string("concat_598_interleave_0"), val = bool(false)]; + tensor concat_598 = concat(axis = concat_598_axis_0, interleave = concat_598_interleave_0, values = (concat_598_values0_0, expand_dims_432, expand_dims_1, expand_dims_434))[name = string("concat_598")]; + tensor concat_599_values0_0 = const()[name = string("concat_599_values0_0"), val = tensor([0])]; + tensor concat_599_values1_0 = const()[name = string("concat_599_values1_0"), val = tensor([0])]; + tensor concat_599_values3_0 = const()[name = string("concat_599_values3_0"), val = tensor([0])]; + int32 concat_599_axis_0 = const()[name = string("concat_599_axis_0"), val = int32(0)]; + bool concat_599_interleave_0 = const()[name = string("concat_599_interleave_0"), val = bool(false)]; + tensor concat_599 = concat(axis = concat_599_axis_0, interleave = concat_599_interleave_0, values = (concat_599_values0_0, concat_599_values1_0, expand_dims_435, concat_599_values3_0))[name = string("concat_599")]; + tensor k_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = k_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = k_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_28_stride_0, update = linear_217_cast_fp16, x = coreml_update_state_116)[name = string("k_cache1_internal_tensor_assign_28_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_28_cast_fp16, input = k_cache1)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = k_cache1)[name = string("coreml_update_state_118")]; + tensor v_cache1_internal_tensor_assign_28_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_28_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_28_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_598, begin_mask = v_cache1_internal_tensor_assign_28_begin_mask_0, end = concat_599, end_mask = v_cache1_internal_tensor_assign_28_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_28_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_28_stride_0, update = linear_218_cast_fp16, x = coreml_update_state_117)[name = string("v_cache1_internal_tensor_assign_28_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_28_cast_fp16, input = v_cache1)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = v_cache1)[name = string("coreml_update_state_119")]; + int32 concat_604_values0_0 = const()[name = string("concat_604_values0_0"), val = int32(1)]; + int32 concat_604_values2_0 = const()[name = string("concat_604_values2_0"), val = int32(1280)]; + int32 concat_604_axis_0 = const()[name = string("concat_604_axis_0"), val = int32(0)]; + bool concat_604_interleave_0 = const()[name = string("concat_604_interleave_0"), val = bool(false)]; + tensor concat_604 = concat(axis = concat_604_axis_0, interleave = concat_604_interleave_0, values = (concat_604_values0_0, end_step_57, concat_604_values2_0))[name = string("concat_604")]; + tensor var_5933_begin_0 = const()[name = string("op_5933_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5933_end_mask_0 = const()[name = string("op_5933_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = concat_604, end_mask = var_5933_end_mask_0, x = k_cache_109_cast_fp16)[name = string("op_5933_cast_fp16")]; + tensor var_5936_begin_0 = const()[name = string("op_5936_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5936_end_mask_0 = const()[name = string("op_5936_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5936_cast_fp16 = slice_by_index(begin = var_5936_begin_0, end = concat_604, end_mask = var_5936_end_mask_0, x = v_cache_109_cast_fp16)[name = string("op_5936_cast_fp16")]; + tensor concat_606x = const()[name = string("concat_606x"), val = tensor([1, -1, 20, 64])]; + tensor var_5946_cast_fp16 = reshape(shape = concat_606x, x = linear_216_cast_fp16)[name = string("op_5946_cast_fp16")]; + tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_219_cast_fp16 = mul(x = var_5946_cast_fp16, y = const_268_to_fp16)[name = string("q_219_cast_fp16")]; + tensor concat_607x = const()[name = string("concat_607x"), val = tensor([1, -1, 20, 64])]; + tensor var_5953_cast_fp16 = reshape(shape = concat_607x, x = var_5933_cast_fp16)[name = string("op_5953_cast_fp16")]; + tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_275_cast_fp16 = mul(x = var_5953_cast_fp16, y = const_269_to_fp16)[name = string("k_275_cast_fp16")]; + tensor concat_608x = const()[name = string("concat_608x"), val = tensor([1, -1, 20, 64])]; + tensor var_5960_cast_fp16 = reshape(shape = concat_608x, x = var_5936_cast_fp16)[name = string("op_5960_cast_fp16")]; + tensor var_5961 = const()[name = string("op_5961"), val = tensor([0, 2, 1, 3])]; + bool qk_163_transpose_x_0 = const()[name = string("qk_163_transpose_x_0"), val = bool(false)]; + bool qk_163_transpose_y_0 = const()[name = string("qk_163_transpose_y_0"), val = bool(false)]; + tensor transpose_365_perm_0 = const()[name = string("transpose_365_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_366_perm_0 = const()[name = string("transpose_366_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_366 = transpose(perm = transpose_366_perm_0, x = k_275_cast_fp16)[name = string("transpose_422")]; + tensor transpose_365 = transpose(perm = transpose_365_perm_0, x = q_219_cast_fp16)[name = string("transpose_423")]; + tensor qk_163_cast_fp16 = matmul(transpose_x = qk_163_transpose_x_0, transpose_y = qk_163_transpose_y_0, x = transpose_365, y = transpose_366)[name = string("qk_163_cast_fp16")]; + int32 concat_609_values1_0 = const()[name = string("concat_609_values1_0"), val = int32(448)]; + int32 concat_609_axis_0 = const()[name = string("concat_609_axis_0"), val = int32(0)]; + bool concat_609_interleave_0 = const()[name = string("concat_609_interleave_0"), val = bool(false)]; + tensor concat_609 = concat(axis = concat_609_axis_0, interleave = concat_609_interleave_0, values = (gather_326_cast_uint16_to_int32, concat_609_values1_0))[name = string("concat_609")]; + tensor var_5964_begin_0 = const()[name = string("op_5964_begin_0"), val = tensor([0, 0])]; + tensor var_5964_end_mask_0 = const()[name = string("op_5964_end_mask_0"), val = tensor([false, true])]; + tensor var_5964_cast_fp16 = slice_by_index(begin = var_5964_begin_0, end = concat_609, end_mask = var_5964_end_mask_0, x = mask_to_fp16)[name = string("op_5964_cast_fp16")]; + int32 concat_610_values0_0 = const()[name = string("concat_610_values0_0"), val = int32(0)]; + int32 concat_610_axis_0 = const()[name = string("concat_610_axis_0"), val = int32(0)]; + bool concat_610_interleave_0 = const()[name = string("concat_610_interleave_0"), val = bool(false)]; + tensor concat_610 = concat(axis = concat_610_axis_0, interleave = concat_610_interleave_0, values = (concat_610_values0_0, gather_326_cast_uint16_to_int32))[name = string("concat_610")]; + tensor var_5965_begin_0 = const()[name = string("op_5965_begin_0"), val = tensor([0, 0])]; + tensor var_5965_end_mask_0 = const()[name = string("op_5965_end_mask_0"), val = tensor([true, false])]; + tensor var_5965_cast_fp16 = slice_by_index(begin = var_5965_begin_0, end = concat_610, end_mask = var_5965_end_mask_0, x = var_5964_cast_fp16)[name = string("op_5965_cast_fp16")]; + tensor qk_165_cast_fp16 = add(x = qk_163_cast_fp16, y = var_5965_cast_fp16)[name = string("qk_165_cast_fp16")]; + tensor var_5968_cast_fp16 = softmax(axis = var_5877, x = qk_165_cast_fp16)[name = string("op_5968_cast_fp16")]; + bool var_5970_transpose_x_0 = const()[name = string("op_5970_transpose_x_0"), val = bool(false)]; + bool var_5970_transpose_y_0 = const()[name = string("op_5970_transpose_y_0"), val = bool(false)]; + tensor v_275_cast_fp16 = transpose(perm = var_5961, x = var_5960_cast_fp16)[name = string("transpose_424")]; + tensor var_5970_cast_fp16 = matmul(transpose_x = var_5970_transpose_x_0, transpose_y = var_5970_transpose_y_0, x = var_5968_cast_fp16, y = v_275_cast_fp16)[name = string("op_5970_cast_fp16")]; + tensor var_5971 = const()[name = string("op_5971"), val = tensor([0, 2, 1, 3])]; + tensor concat_611x = const()[name = string("concat_611x"), val = tensor([1, -1, 1280])]; + tensor var_5972_cast_fp16 = transpose(perm = var_5971, x = var_5970_cast_fp16)[name = string("transpose_421")]; + tensor x_493_cast_fp16 = reshape(shape = concat_611x, x = var_5972_cast_fp16)[name = string("x_493_cast_fp16")]; + tensor var_5976_to_fp16 = const()[name = string("op_5976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1387781888)))]; + tensor var_5977_to_fp16 = const()[name = string("op_5977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391058752)))]; + tensor linear_219_cast_fp16 = linear(bias = var_5977_to_fp16, weight = var_5976_to_fp16, x = x_493_cast_fp16)[name = string("linear_219_cast_fp16")]; + tensor x_495_cast_fp16 = add(x = x_489_cast_fp16, y = linear_219_cast_fp16)[name = string("x_495_cast_fp16")]; + tensor var_5984_axes_0 = const()[name = string("op_5984_axes_0"), val = tensor([-1])]; + tensor blocks_27_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391061376)))]; + tensor blocks_27_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391064000)))]; + tensor var_5984_cast_fp16 = layer_norm(axes = var_5984_axes_0, beta = blocks_27_cross_attn_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_cross_attn_ln_weight_to_fp16, x = x_495_cast_fp16)[name = string("op_5984_cast_fp16")]; + tensor var_5993_to_fp16 = const()[name = string("op_5993_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1391066624)))]; + tensor var_5994_to_fp16 = const()[name = string("op_5994_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394343488)))]; + tensor linear_220_cast_fp16 = linear(bias = var_5994_to_fp16, weight = var_5993_to_fp16, x = var_5984_cast_fp16)[name = string("linear_220_cast_fp16")]; + tensor concat_612 = const()[name = string("concat_612"), val = tensor([0, 0, 0])]; + tensor concat_613 = const()[name = string("concat_613"), val = tensor([0, 1500, 0])]; + tensor k_277_internal_tensor_assign_1_stride_0 = const()[name = string("k_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_612, begin_mask = k_277_internal_tensor_assign_1_begin_mask_0, end = concat_613, end_mask = k_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_277_internal_tensor_assign_1_squeeze_mask_0, stride = k_277_internal_tensor_assign_1_stride_0, update = k_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("k_277_internal_tensor_assign_1_cast_fp16")]; + tensor concat_614 = const()[name = string("concat_614"), val = tensor([0, 0, 0])]; + tensor concat_615 = const()[name = string("concat_615"), val = tensor([0, 1500, 0])]; + tensor v_277_internal_tensor_assign_1_stride_0 = const()[name = string("v_277_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_277_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_277_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_277_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_277_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_277_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_614, begin_mask = v_277_internal_tensor_assign_1_begin_mask_0, end = concat_615, end_mask = v_277_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_277_internal_tensor_assign_1_squeeze_mask_0, stride = v_277_internal_tensor_assign_1_stride_0, update = v_cache_111_cast_fp16, x = k_7_to_fp16)[name = string("v_277_internal_tensor_assign_1_cast_fp16")]; + tensor concat_616x = const()[name = string("concat_616x"), val = tensor([1, -1, 20, 64])]; + tensor var_6014_cast_fp16 = reshape(shape = concat_616x, x = linear_220_cast_fp16)[name = string("op_6014_cast_fp16")]; + tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_223_cast_fp16 = mul(x = var_6014_cast_fp16, y = const_270_to_fp16)[name = string("q_223_cast_fp16")]; + tensor var_6020 = const()[name = string("op_6020"), val = tensor([1, 1500, 20, -1])]; + tensor var_6021_cast_fp16 = reshape(shape = var_6020, x = k_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6021_cast_fp16")]; + tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_279_cast_fp16 = mul(x = var_6021_cast_fp16, y = const_271_to_fp16)[name = string("k_279_cast_fp16")]; + tensor var_6027 = const()[name = string("op_6027"), val = tensor([1, 1500, 20, -1])]; + tensor var_6028_cast_fp16 = reshape(shape = var_6027, x = v_277_internal_tensor_assign_1_cast_fp16)[name = string("op_6028_cast_fp16")]; + tensor var_6029 = const()[name = string("op_6029"), val = tensor([0, 2, 1, 3])]; + bool qk_167_transpose_x_0 = const()[name = string("qk_167_transpose_x_0"), val = bool(false)]; + bool qk_167_transpose_y_0 = const()[name = string("qk_167_transpose_y_0"), val = bool(false)]; + tensor transpose_367_perm_0 = const()[name = string("transpose_367_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_368_perm_0 = const()[name = string("transpose_368_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_368 = transpose(perm = transpose_368_perm_0, x = k_279_cast_fp16)[name = string("transpose_418")]; + tensor transpose_367 = transpose(perm = transpose_367_perm_0, x = q_223_cast_fp16)[name = string("transpose_419")]; + tensor qk_167_cast_fp16 = matmul(transpose_x = qk_167_transpose_x_0, transpose_y = qk_167_transpose_y_0, x = transpose_367, y = transpose_368)[name = string("qk_167_cast_fp16")]; + tensor var_6033_cast_fp16 = softmax(axis = var_5877, x = qk_167_cast_fp16)[name = string("op_6033_cast_fp16")]; + bool var_6035_transpose_x_0 = const()[name = string("op_6035_transpose_x_0"), val = bool(false)]; + bool var_6035_transpose_y_0 = const()[name = string("op_6035_transpose_y_0"), val = bool(false)]; + tensor v_279_cast_fp16 = transpose(perm = var_6029, x = var_6028_cast_fp16)[name = string("transpose_420")]; + tensor var_6035_cast_fp16 = matmul(transpose_x = var_6035_transpose_x_0, transpose_y = var_6035_transpose_y_0, x = var_6033_cast_fp16, y = v_279_cast_fp16)[name = string("op_6035_cast_fp16")]; + tensor var_6036 = const()[name = string("op_6036"), val = tensor([0, 2, 1, 3])]; + tensor concat_617x = const()[name = string("concat_617x"), val = tensor([1, -1, 1280])]; + tensor var_6037_cast_fp16 = transpose(perm = var_6036, x = var_6035_cast_fp16)[name = string("transpose_417")]; + tensor x_499_cast_fp16 = reshape(shape = concat_617x, x = var_6037_cast_fp16)[name = string("x_499_cast_fp16")]; + tensor var_6041_to_fp16 = const()[name = string("op_6041_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1394346112)))]; + tensor var_6042_to_fp16 = const()[name = string("op_6042_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397622976)))]; + tensor linear_221_cast_fp16 = linear(bias = var_6042_to_fp16, weight = var_6041_to_fp16, x = x_499_cast_fp16)[name = string("linear_221_cast_fp16")]; + tensor x_501_cast_fp16 = add(x = x_495_cast_fp16, y = linear_221_cast_fp16)[name = string("x_501_cast_fp16")]; + tensor var_6049_axes_0 = const()[name = string("op_6049_axes_0"), val = tensor([-1])]; + tensor blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397625600)))]; + tensor blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397628224)))]; + tensor var_6049_cast_fp16 = layer_norm(axes = var_6049_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_5883_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_501_cast_fp16)[name = string("op_6049_cast_fp16")]; + tensor var_6058_to_fp16 = const()[name = string("op_6058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1397630848)))]; + tensor var_6059_to_fp16 = const()[name = string("op_6059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410738112)))]; + tensor linear_222_cast_fp16 = linear(bias = var_6059_to_fp16, weight = var_6058_to_fp16, x = var_6049_cast_fp16)[name = string("linear_222_cast_fp16")]; + string x_505_mode_0 = const()[name = string("x_505_mode_0"), val = string("EXACT")]; + tensor x_505_cast_fp16 = gelu(mode = x_505_mode_0, x = linear_222_cast_fp16)[name = string("x_505_cast_fp16")]; + tensor var_6064_to_fp16 = const()[name = string("op_6064_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1410748416)))]; + tensor var_6065_to_fp16 = const()[name = string("op_6065_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423855680)))]; + tensor linear_223_cast_fp16 = linear(bias = var_6065_to_fp16, weight = var_6064_to_fp16, x = x_505_cast_fp16)[name = string("linear_223_cast_fp16")]; + tensor x_507_cast_fp16 = add(x = x_501_cast_fp16, y = linear_223_cast_fp16)[name = string("x_507_cast_fp16")]; + tensor k_cache_113_begin_0 = const()[name = string("k_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor k_cache_113_end_0 = const()[name = string("k_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; + tensor k_cache_113_end_mask_0 = const()[name = string("k_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_113_squeeze_mask_0 = const()[name = string("k_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_113_cast_fp16 = slice_by_index(begin = k_cache_113_begin_0, end = k_cache_113_end_0, end_mask = k_cache_113_end_mask_0, squeeze_mask = k_cache_113_squeeze_mask_0, x = coreml_update_state_118)[name = string("k_cache_113_cast_fp16")]; + tensor v_cache_113_begin_0 = const()[name = string("v_cache_113_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor v_cache_113_end_0 = const()[name = string("v_cache_113_end_0"), val = tensor([29, 1, 448, 1280])]; + tensor v_cache_113_end_mask_0 = const()[name = string("v_cache_113_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_113_squeeze_mask_0 = const()[name = string("v_cache_113_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_113_cast_fp16 = slice_by_index(begin = v_cache_113_begin_0, end = v_cache_113_end_0, end_mask = v_cache_113_end_mask_0, squeeze_mask = v_cache_113_squeeze_mask_0, x = coreml_update_state_119)[name = string("v_cache_113_cast_fp16")]; + tensor k_cache_115_begin_0 = const()[name = string("k_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor k_cache_115_end_0 = const()[name = string("k_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; + tensor k_cache_115_end_mask_0 = const()[name = string("k_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_115_squeeze_mask_0 = const()[name = string("k_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_115_cast_fp16 = slice_by_index(begin = k_cache_115_begin_0, end = k_cache_115_end_0, end_mask = k_cache_115_end_mask_0, squeeze_mask = k_cache_115_squeeze_mask_0, x = read_state_2)[name = string("k_cache_115_cast_fp16")]; + tensor v_cache_115_begin_0 = const()[name = string("v_cache_115_begin_0"), val = tensor([28, 0, 0, 0])]; + tensor v_cache_115_end_0 = const()[name = string("v_cache_115_end_0"), val = tensor([29, 1, 1500, 1280])]; + tensor v_cache_115_end_mask_0 = const()[name = string("v_cache_115_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_115_squeeze_mask_0 = const()[name = string("v_cache_115_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_115_cast_fp16 = slice_by_index(begin = v_cache_115_begin_0, end = v_cache_115_end_0, end_mask = v_cache_115_end_mask_0, squeeze_mask = v_cache_115_squeeze_mask_0, x = read_state_3)[name = string("v_cache_115_cast_fp16")]; + int32 var_6088 = const()[name = string("op_6088"), val = int32(-1)]; + tensor var_6106_axes_0 = const()[name = string("op_6106_axes_0"), val = tensor([-1])]; + tensor blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423858304)))]; + tensor blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423860928)))]; + fp16 var_6094_to_fp16 = const()[name = string("op_6094_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6106_cast_fp16 = layer_norm(axes = var_6106_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_507_cast_fp16)[name = string("op_6106_cast_fp16")]; + tensor var_6117_to_fp16 = const()[name = string("op_6117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1423863552)))]; + tensor var_6118_to_fp16 = const()[name = string("op_6118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427140416)))]; + tensor linear_224_cast_fp16 = linear(bias = var_6118_to_fp16, weight = var_6117_to_fp16, x = var_6106_cast_fp16)[name = string("linear_224_cast_fp16")]; + tensor var_6121_to_fp16 = const()[name = string("op_6121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1427143040)))]; + tensor linear_225_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6121_to_fp16, x = var_6106_cast_fp16)[name = string("linear_225_cast_fp16")]; + tensor var_6125_to_fp16 = const()[name = string("op_6125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1430419904)))]; + tensor var_6126_to_fp16 = const()[name = string("op_6126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433696768)))]; + tensor linear_226_cast_fp16 = linear(bias = var_6126_to_fp16, weight = var_6125_to_fp16, x = var_6106_cast_fp16)[name = string("linear_226_cast_fp16")]; + tensor var_6128_shape_cast_fp16 = shape(x = linear_224_cast_fp16)[name = string("op_6128_shape_cast_fp16")]; + int32 gather_338_axis_0 = const()[name = string("gather_338_axis_0"), val = int32(0)]; + int32 gather_338_batch_dims_0 = const()[name = string("gather_338_batch_dims_0"), val = int32(0)]; + bool gather_338_validate_indices_0 = const()[name = string("gather_338_validate_indices_0"), val = bool(false)]; + string var_6128_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6128_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_338_to_uint16 = const()[name = string("select_338_to_uint16"), val = uint16(1)]; + tensor var_6128_shape_cast_fp16_to_uint16 = cast(dtype = var_6128_shape_cast_fp16_to_uint16_dtype_0, x = var_6128_shape_cast_fp16)[name = string("cast_334")]; + uint16 gather_338_cast_uint16 = gather(axis = gather_338_axis_0, batch_dims = gather_338_batch_dims_0, indices = select_338_to_uint16, validate_indices = gather_338_validate_indices_0, x = var_6128_shape_cast_fp16_to_uint16)[name = string("gather_338_cast_uint16")]; + string gather_338_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_338_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_338_cast_uint16_to_int32 = cast(dtype = gather_338_cast_uint16_to_int32_dtype_0, x = gather_338_cast_uint16)[name = string("cast_333")]; + int32 end_step_59 = add(x = offset, y = gather_338_cast_uint16_to_int32)[name = string("end_step_59")]; + tensor expand_dims_448 = const()[name = string("expand_dims_448"), val = tensor([0])]; + tensor expand_dims_450 = const()[name = string("expand_dims_450"), val = tensor([0])]; + tensor expand_dims_451_axes_0 = const()[name = string("expand_dims_451_axes_0"), val = tensor([0])]; + tensor expand_dims_451 = expand_dims(axes = expand_dims_451_axes_0, x = end_step_59)[name = string("expand_dims_451")]; + tensor concat_620_values0_0 = const()[name = string("concat_620_values0_0"), val = tensor([28])]; + int32 concat_620_axis_0 = const()[name = string("concat_620_axis_0"), val = int32(0)]; + bool concat_620_interleave_0 = const()[name = string("concat_620_interleave_0"), val = bool(false)]; + tensor concat_620 = concat(axis = concat_620_axis_0, interleave = concat_620_interleave_0, values = (concat_620_values0_0, expand_dims_448, expand_dims_1, expand_dims_450))[name = string("concat_620")]; + tensor concat_621_values0_0 = const()[name = string("concat_621_values0_0"), val = tensor([0])]; + tensor concat_621_values1_0 = const()[name = string("concat_621_values1_0"), val = tensor([0])]; + tensor concat_621_values3_0 = const()[name = string("concat_621_values3_0"), val = tensor([0])]; + int32 concat_621_axis_0 = const()[name = string("concat_621_axis_0"), val = int32(0)]; + bool concat_621_interleave_0 = const()[name = string("concat_621_interleave_0"), val = bool(false)]; + tensor concat_621 = concat(axis = concat_621_axis_0, interleave = concat_621_interleave_0, values = (concat_621_values0_0, concat_621_values1_0, expand_dims_451, concat_621_values3_0))[name = string("concat_621")]; + tensor k_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = k_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = k_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_29_stride_0, update = linear_225_cast_fp16, x = coreml_update_state_118)[name = string("k_cache1_internal_tensor_assign_29_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_29_cast_fp16, input = k_cache1)[name = string("coreml_update_state_120_write_state")]; + tensor coreml_update_state_120 = read_state(input = k_cache1)[name = string("coreml_update_state_120")]; + tensor v_cache1_internal_tensor_assign_29_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_29_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_29_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_620, begin_mask = v_cache1_internal_tensor_assign_29_begin_mask_0, end = concat_621, end_mask = v_cache1_internal_tensor_assign_29_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_29_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_29_stride_0, update = linear_226_cast_fp16, x = coreml_update_state_119)[name = string("v_cache1_internal_tensor_assign_29_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_29_cast_fp16, input = v_cache1)[name = string("coreml_update_state_121_write_state")]; + tensor coreml_update_state_121 = read_state(input = v_cache1)[name = string("coreml_update_state_121")]; + int32 concat_626_values0_0 = const()[name = string("concat_626_values0_0"), val = int32(1)]; + int32 concat_626_values2_0 = const()[name = string("concat_626_values2_0"), val = int32(1280)]; + int32 concat_626_axis_0 = const()[name = string("concat_626_axis_0"), val = int32(0)]; + bool concat_626_interleave_0 = const()[name = string("concat_626_interleave_0"), val = bool(false)]; + tensor concat_626 = concat(axis = concat_626_axis_0, interleave = concat_626_interleave_0, values = (concat_626_values0_0, end_step_59, concat_626_values2_0))[name = string("concat_626")]; + tensor var_6144_begin_0 = const()[name = string("op_6144_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6144_end_mask_0 = const()[name = string("op_6144_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6144_cast_fp16 = slice_by_index(begin = var_6144_begin_0, end = concat_626, end_mask = var_6144_end_mask_0, x = k_cache_113_cast_fp16)[name = string("op_6144_cast_fp16")]; + tensor var_6147_begin_0 = const()[name = string("op_6147_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6147_end_mask_0 = const()[name = string("op_6147_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = concat_626, end_mask = var_6147_end_mask_0, x = v_cache_113_cast_fp16)[name = string("op_6147_cast_fp16")]; + tensor concat_628x = const()[name = string("concat_628x"), val = tensor([1, -1, 20, 64])]; + tensor var_6157_cast_fp16 = reshape(shape = concat_628x, x = linear_224_cast_fp16)[name = string("op_6157_cast_fp16")]; + tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_227_cast_fp16 = mul(x = var_6157_cast_fp16, y = const_272_to_fp16)[name = string("q_227_cast_fp16")]; + tensor concat_629x = const()[name = string("concat_629x"), val = tensor([1, -1, 20, 64])]; + tensor var_6164_cast_fp16 = reshape(shape = concat_629x, x = var_6144_cast_fp16)[name = string("op_6164_cast_fp16")]; + tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_285_cast_fp16 = mul(x = var_6164_cast_fp16, y = const_273_to_fp16)[name = string("k_285_cast_fp16")]; + tensor concat_630x = const()[name = string("concat_630x"), val = tensor([1, -1, 20, 64])]; + tensor var_6171_cast_fp16 = reshape(shape = concat_630x, x = var_6147_cast_fp16)[name = string("op_6171_cast_fp16")]; + tensor var_6172 = const()[name = string("op_6172"), val = tensor([0, 2, 1, 3])]; + bool qk_169_transpose_x_0 = const()[name = string("qk_169_transpose_x_0"), val = bool(false)]; + bool qk_169_transpose_y_0 = const()[name = string("qk_169_transpose_y_0"), val = bool(false)]; + tensor transpose_369_perm_0 = const()[name = string("transpose_369_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_370_perm_0 = const()[name = string("transpose_370_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_370 = transpose(perm = transpose_370_perm_0, x = k_285_cast_fp16)[name = string("transpose_414")]; + tensor transpose_369 = transpose(perm = transpose_369_perm_0, x = q_227_cast_fp16)[name = string("transpose_415")]; + tensor qk_169_cast_fp16 = matmul(transpose_x = qk_169_transpose_x_0, transpose_y = qk_169_transpose_y_0, x = transpose_369, y = transpose_370)[name = string("qk_169_cast_fp16")]; + int32 concat_631_values1_0 = const()[name = string("concat_631_values1_0"), val = int32(448)]; + int32 concat_631_axis_0 = const()[name = string("concat_631_axis_0"), val = int32(0)]; + bool concat_631_interleave_0 = const()[name = string("concat_631_interleave_0"), val = bool(false)]; + tensor concat_631 = concat(axis = concat_631_axis_0, interleave = concat_631_interleave_0, values = (gather_338_cast_uint16_to_int32, concat_631_values1_0))[name = string("concat_631")]; + tensor var_6175_begin_0 = const()[name = string("op_6175_begin_0"), val = tensor([0, 0])]; + tensor var_6175_end_mask_0 = const()[name = string("op_6175_end_mask_0"), val = tensor([false, true])]; + tensor var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = concat_631, end_mask = var_6175_end_mask_0, x = mask_to_fp16)[name = string("op_6175_cast_fp16")]; + int32 concat_632_values0_0 = const()[name = string("concat_632_values0_0"), val = int32(0)]; + int32 concat_632_axis_0 = const()[name = string("concat_632_axis_0"), val = int32(0)]; + bool concat_632_interleave_0 = const()[name = string("concat_632_interleave_0"), val = bool(false)]; + tensor concat_632 = concat(axis = concat_632_axis_0, interleave = concat_632_interleave_0, values = (concat_632_values0_0, gather_338_cast_uint16_to_int32))[name = string("concat_632")]; + tensor var_6176_begin_0 = const()[name = string("op_6176_begin_0"), val = tensor([0, 0])]; + tensor var_6176_end_mask_0 = const()[name = string("op_6176_end_mask_0"), val = tensor([true, false])]; + tensor var_6176_cast_fp16 = slice_by_index(begin = var_6176_begin_0, end = concat_632, end_mask = var_6176_end_mask_0, x = var_6175_cast_fp16)[name = string("op_6176_cast_fp16")]; + tensor qk_171_cast_fp16 = add(x = qk_169_cast_fp16, y = var_6176_cast_fp16)[name = string("qk_171_cast_fp16")]; + tensor var_6179_cast_fp16 = softmax(axis = var_6088, x = qk_171_cast_fp16)[name = string("op_6179_cast_fp16")]; + bool var_6181_transpose_x_0 = const()[name = string("op_6181_transpose_x_0"), val = bool(false)]; + bool var_6181_transpose_y_0 = const()[name = string("op_6181_transpose_y_0"), val = bool(false)]; + tensor v_285_cast_fp16 = transpose(perm = var_6172, x = var_6171_cast_fp16)[name = string("transpose_416")]; + tensor var_6181_cast_fp16 = matmul(transpose_x = var_6181_transpose_x_0, transpose_y = var_6181_transpose_y_0, x = var_6179_cast_fp16, y = v_285_cast_fp16)[name = string("op_6181_cast_fp16")]; + tensor var_6182 = const()[name = string("op_6182"), val = tensor([0, 2, 1, 3])]; + tensor concat_633x = const()[name = string("concat_633x"), val = tensor([1, -1, 1280])]; + tensor var_6183_cast_fp16 = transpose(perm = var_6182, x = var_6181_cast_fp16)[name = string("transpose_413")]; + tensor x_511_cast_fp16 = reshape(shape = concat_633x, x = var_6183_cast_fp16)[name = string("x_511_cast_fp16")]; + tensor var_6187_to_fp16 = const()[name = string("op_6187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1433699392)))]; + tensor var_6188_to_fp16 = const()[name = string("op_6188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436976256)))]; + tensor linear_227_cast_fp16 = linear(bias = var_6188_to_fp16, weight = var_6187_to_fp16, x = x_511_cast_fp16)[name = string("linear_227_cast_fp16")]; + tensor x_513_cast_fp16 = add(x = x_507_cast_fp16, y = linear_227_cast_fp16)[name = string("x_513_cast_fp16")]; + tensor var_6195_axes_0 = const()[name = string("op_6195_axes_0"), val = tensor([-1])]; + tensor blocks_28_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436978880)))]; + tensor blocks_28_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436981504)))]; + tensor var_6195_cast_fp16 = layer_norm(axes = var_6195_axes_0, beta = blocks_28_cross_attn_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_cross_attn_ln_weight_to_fp16, x = x_513_cast_fp16)[name = string("op_6195_cast_fp16")]; + tensor var_6204_to_fp16 = const()[name = string("op_6204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1436984128)))]; + tensor var_6205_to_fp16 = const()[name = string("op_6205_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440260992)))]; + tensor linear_228_cast_fp16 = linear(bias = var_6205_to_fp16, weight = var_6204_to_fp16, x = var_6195_cast_fp16)[name = string("linear_228_cast_fp16")]; + tensor concat_634 = const()[name = string("concat_634"), val = tensor([0, 0, 0])]; + tensor concat_635 = const()[name = string("concat_635"), val = tensor([0, 1500, 0])]; + tensor k_287_internal_tensor_assign_1_stride_0 = const()[name = string("k_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_634, begin_mask = k_287_internal_tensor_assign_1_begin_mask_0, end = concat_635, end_mask = k_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_287_internal_tensor_assign_1_squeeze_mask_0, stride = k_287_internal_tensor_assign_1_stride_0, update = k_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("k_287_internal_tensor_assign_1_cast_fp16")]; + tensor concat_636 = const()[name = string("concat_636"), val = tensor([0, 0, 0])]; + tensor concat_637 = const()[name = string("concat_637"), val = tensor([0, 1500, 0])]; + tensor v_287_internal_tensor_assign_1_stride_0 = const()[name = string("v_287_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_287_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_287_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_287_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_287_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_287_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_636, begin_mask = v_287_internal_tensor_assign_1_begin_mask_0, end = concat_637, end_mask = v_287_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_287_internal_tensor_assign_1_squeeze_mask_0, stride = v_287_internal_tensor_assign_1_stride_0, update = v_cache_115_cast_fp16, x = k_7_to_fp16)[name = string("v_287_internal_tensor_assign_1_cast_fp16")]; + tensor concat_638x = const()[name = string("concat_638x"), val = tensor([1, -1, 20, 64])]; + tensor var_6225_cast_fp16 = reshape(shape = concat_638x, x = linear_228_cast_fp16)[name = string("op_6225_cast_fp16")]; + tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_231_cast_fp16 = mul(x = var_6225_cast_fp16, y = const_274_to_fp16)[name = string("q_231_cast_fp16")]; + tensor var_6231 = const()[name = string("op_6231"), val = tensor([1, 1500, 20, -1])]; + tensor var_6232_cast_fp16 = reshape(shape = var_6231, x = k_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6232_cast_fp16")]; + tensor const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_289_cast_fp16 = mul(x = var_6232_cast_fp16, y = const_275_to_fp16)[name = string("k_289_cast_fp16")]; + tensor var_6238 = const()[name = string("op_6238"), val = tensor([1, 1500, 20, -1])]; + tensor var_6239_cast_fp16 = reshape(shape = var_6238, x = v_287_internal_tensor_assign_1_cast_fp16)[name = string("op_6239_cast_fp16")]; + tensor var_6240 = const()[name = string("op_6240"), val = tensor([0, 2, 1, 3])]; + bool qk_173_transpose_x_0 = const()[name = string("qk_173_transpose_x_0"), val = bool(false)]; + bool qk_173_transpose_y_0 = const()[name = string("qk_173_transpose_y_0"), val = bool(false)]; + tensor transpose_371_perm_0 = const()[name = string("transpose_371_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_372_perm_0 = const()[name = string("transpose_372_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_372 = transpose(perm = transpose_372_perm_0, x = k_289_cast_fp16)[name = string("transpose_410")]; + tensor transpose_371 = transpose(perm = transpose_371_perm_0, x = q_231_cast_fp16)[name = string("transpose_411")]; + tensor qk_173_cast_fp16 = matmul(transpose_x = qk_173_transpose_x_0, transpose_y = qk_173_transpose_y_0, x = transpose_371, y = transpose_372)[name = string("qk_173_cast_fp16")]; + tensor var_6244_cast_fp16 = softmax(axis = var_6088, x = qk_173_cast_fp16)[name = string("op_6244_cast_fp16")]; + bool var_6246_transpose_x_0 = const()[name = string("op_6246_transpose_x_0"), val = bool(false)]; + bool var_6246_transpose_y_0 = const()[name = string("op_6246_transpose_y_0"), val = bool(false)]; + tensor v_289_cast_fp16 = transpose(perm = var_6240, x = var_6239_cast_fp16)[name = string("transpose_412")]; + tensor var_6246_cast_fp16 = matmul(transpose_x = var_6246_transpose_x_0, transpose_y = var_6246_transpose_y_0, x = var_6244_cast_fp16, y = v_289_cast_fp16)[name = string("op_6246_cast_fp16")]; + tensor var_6247 = const()[name = string("op_6247"), val = tensor([0, 2, 1, 3])]; + tensor concat_639x = const()[name = string("concat_639x"), val = tensor([1, -1, 1280])]; + tensor var_6248_cast_fp16 = transpose(perm = var_6247, x = var_6246_cast_fp16)[name = string("transpose_409")]; + tensor x_517_cast_fp16 = reshape(shape = concat_639x, x = var_6248_cast_fp16)[name = string("x_517_cast_fp16")]; + tensor var_6252_to_fp16 = const()[name = string("op_6252_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1440263616)))]; + tensor var_6253_to_fp16 = const()[name = string("op_6253_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443540480)))]; + tensor linear_229_cast_fp16 = linear(bias = var_6253_to_fp16, weight = var_6252_to_fp16, x = x_517_cast_fp16)[name = string("linear_229_cast_fp16")]; + tensor x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_229_cast_fp16)[name = string("x_519_cast_fp16")]; + tensor var_6260_axes_0 = const()[name = string("op_6260_axes_0"), val = tensor([-1])]; + tensor blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443543104)))]; + tensor blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443545728)))]; + tensor var_6260_cast_fp16 = layer_norm(axes = var_6260_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_6094_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_519_cast_fp16)[name = string("op_6260_cast_fp16")]; + tensor var_6269_to_fp16 = const()[name = string("op_6269_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1443548352)))]; + tensor var_6270_to_fp16 = const()[name = string("op_6270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456655616)))]; + tensor linear_230_cast_fp16 = linear(bias = var_6270_to_fp16, weight = var_6269_to_fp16, x = var_6260_cast_fp16)[name = string("linear_230_cast_fp16")]; + string x_523_mode_0 = const()[name = string("x_523_mode_0"), val = string("EXACT")]; + tensor x_523_cast_fp16 = gelu(mode = x_523_mode_0, x = linear_230_cast_fp16)[name = string("x_523_cast_fp16")]; + tensor var_6275_to_fp16 = const()[name = string("op_6275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1456665920)))]; + tensor var_6276_to_fp16 = const()[name = string("op_6276_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469773184)))]; + tensor linear_231_cast_fp16 = linear(bias = var_6276_to_fp16, weight = var_6275_to_fp16, x = x_523_cast_fp16)[name = string("linear_231_cast_fp16")]; + tensor x_525_cast_fp16 = add(x = x_519_cast_fp16, y = linear_231_cast_fp16)[name = string("x_525_cast_fp16")]; + tensor k_cache_117_begin_0 = const()[name = string("k_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor k_cache_117_end_0 = const()[name = string("k_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; + tensor k_cache_117_end_mask_0 = const()[name = string("k_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_117_squeeze_mask_0 = const()[name = string("k_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_117_cast_fp16 = slice_by_index(begin = k_cache_117_begin_0, end = k_cache_117_end_0, end_mask = k_cache_117_end_mask_0, squeeze_mask = k_cache_117_squeeze_mask_0, x = coreml_update_state_120)[name = string("k_cache_117_cast_fp16")]; + tensor v_cache_117_begin_0 = const()[name = string("v_cache_117_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor v_cache_117_end_0 = const()[name = string("v_cache_117_end_0"), val = tensor([30, 1, 448, 1280])]; + tensor v_cache_117_end_mask_0 = const()[name = string("v_cache_117_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_117_squeeze_mask_0 = const()[name = string("v_cache_117_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_117_cast_fp16 = slice_by_index(begin = v_cache_117_begin_0, end = v_cache_117_end_0, end_mask = v_cache_117_end_mask_0, squeeze_mask = v_cache_117_squeeze_mask_0, x = coreml_update_state_121)[name = string("v_cache_117_cast_fp16")]; + tensor k_cache_119_begin_0 = const()[name = string("k_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor k_cache_119_end_0 = const()[name = string("k_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; + tensor k_cache_119_end_mask_0 = const()[name = string("k_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_119_squeeze_mask_0 = const()[name = string("k_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_119_cast_fp16 = slice_by_index(begin = k_cache_119_begin_0, end = k_cache_119_end_0, end_mask = k_cache_119_end_mask_0, squeeze_mask = k_cache_119_squeeze_mask_0, x = read_state_2)[name = string("k_cache_119_cast_fp16")]; + tensor v_cache_119_begin_0 = const()[name = string("v_cache_119_begin_0"), val = tensor([29, 0, 0, 0])]; + tensor v_cache_119_end_0 = const()[name = string("v_cache_119_end_0"), val = tensor([30, 1, 1500, 1280])]; + tensor v_cache_119_end_mask_0 = const()[name = string("v_cache_119_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_119_squeeze_mask_0 = const()[name = string("v_cache_119_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_119_cast_fp16 = slice_by_index(begin = v_cache_119_begin_0, end = v_cache_119_end_0, end_mask = v_cache_119_end_mask_0, squeeze_mask = v_cache_119_squeeze_mask_0, x = read_state_3)[name = string("v_cache_119_cast_fp16")]; + int32 var_6299 = const()[name = string("op_6299"), val = int32(-1)]; + tensor var_6317_axes_0 = const()[name = string("op_6317_axes_0"), val = tensor([-1])]; + tensor blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469775808)))]; + tensor blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469778432)))]; + fp16 var_6305_to_fp16 = const()[name = string("op_6305_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6317_cast_fp16 = layer_norm(axes = var_6317_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_525_cast_fp16)[name = string("op_6317_cast_fp16")]; + tensor var_6328_to_fp16 = const()[name = string("op_6328_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1469781056)))]; + tensor var_6329_to_fp16 = const()[name = string("op_6329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473057920)))]; + tensor linear_232_cast_fp16 = linear(bias = var_6329_to_fp16, weight = var_6328_to_fp16, x = var_6317_cast_fp16)[name = string("linear_232_cast_fp16")]; + tensor var_6332_to_fp16 = const()[name = string("op_6332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1473060544)))]; + tensor linear_233_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6332_to_fp16, x = var_6317_cast_fp16)[name = string("linear_233_cast_fp16")]; + tensor var_6336_to_fp16 = const()[name = string("op_6336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1476337408)))]; + tensor var_6337_to_fp16 = const()[name = string("op_6337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479614272)))]; + tensor linear_234_cast_fp16 = linear(bias = var_6337_to_fp16, weight = var_6336_to_fp16, x = var_6317_cast_fp16)[name = string("linear_234_cast_fp16")]; + tensor var_6339_shape_cast_fp16 = shape(x = linear_232_cast_fp16)[name = string("op_6339_shape_cast_fp16")]; + int32 gather_350_axis_0 = const()[name = string("gather_350_axis_0"), val = int32(0)]; + int32 gather_350_batch_dims_0 = const()[name = string("gather_350_batch_dims_0"), val = int32(0)]; + bool gather_350_validate_indices_0 = const()[name = string("gather_350_validate_indices_0"), val = bool(false)]; + string var_6339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_350_to_uint16 = const()[name = string("select_350_to_uint16"), val = uint16(1)]; + tensor var_6339_shape_cast_fp16_to_uint16 = cast(dtype = var_6339_shape_cast_fp16_to_uint16_dtype_0, x = var_6339_shape_cast_fp16)[name = string("cast_332")]; + uint16 gather_350_cast_uint16 = gather(axis = gather_350_axis_0, batch_dims = gather_350_batch_dims_0, indices = select_350_to_uint16, validate_indices = gather_350_validate_indices_0, x = var_6339_shape_cast_fp16_to_uint16)[name = string("gather_350_cast_uint16")]; + string gather_350_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_350_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_350_cast_uint16_to_int32 = cast(dtype = gather_350_cast_uint16_to_int32_dtype_0, x = gather_350_cast_uint16)[name = string("cast_331")]; + int32 end_step_61 = add(x = offset, y = gather_350_cast_uint16_to_int32)[name = string("end_step_61")]; + tensor expand_dims_464 = const()[name = string("expand_dims_464"), val = tensor([0])]; + tensor expand_dims_466 = const()[name = string("expand_dims_466"), val = tensor([0])]; + tensor expand_dims_467_axes_0 = const()[name = string("expand_dims_467_axes_0"), val = tensor([0])]; + tensor expand_dims_467 = expand_dims(axes = expand_dims_467_axes_0, x = end_step_61)[name = string("expand_dims_467")]; + tensor concat_642_values0_0 = const()[name = string("concat_642_values0_0"), val = tensor([29])]; + int32 concat_642_axis_0 = const()[name = string("concat_642_axis_0"), val = int32(0)]; + bool concat_642_interleave_0 = const()[name = string("concat_642_interleave_0"), val = bool(false)]; + tensor concat_642 = concat(axis = concat_642_axis_0, interleave = concat_642_interleave_0, values = (concat_642_values0_0, expand_dims_464, expand_dims_1, expand_dims_466))[name = string("concat_642")]; + tensor concat_643_values0_0 = const()[name = string("concat_643_values0_0"), val = tensor([0])]; + tensor concat_643_values1_0 = const()[name = string("concat_643_values1_0"), val = tensor([0])]; + tensor concat_643_values3_0 = const()[name = string("concat_643_values3_0"), val = tensor([0])]; + int32 concat_643_axis_0 = const()[name = string("concat_643_axis_0"), val = int32(0)]; + bool concat_643_interleave_0 = const()[name = string("concat_643_interleave_0"), val = bool(false)]; + tensor concat_643 = concat(axis = concat_643_axis_0, interleave = concat_643_interleave_0, values = (concat_643_values0_0, concat_643_values1_0, expand_dims_467, concat_643_values3_0))[name = string("concat_643")]; + tensor k_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = k_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = k_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_30_stride_0, update = linear_233_cast_fp16, x = coreml_update_state_120)[name = string("k_cache1_internal_tensor_assign_30_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_30_cast_fp16, input = k_cache1)[name = string("coreml_update_state_122_write_state")]; + tensor coreml_update_state_122 = read_state(input = k_cache1)[name = string("coreml_update_state_122")]; + tensor v_cache1_internal_tensor_assign_30_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_30_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_30_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_642, begin_mask = v_cache1_internal_tensor_assign_30_begin_mask_0, end = concat_643, end_mask = v_cache1_internal_tensor_assign_30_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_30_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_30_stride_0, update = linear_234_cast_fp16, x = coreml_update_state_121)[name = string("v_cache1_internal_tensor_assign_30_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_30_cast_fp16, input = v_cache1)[name = string("coreml_update_state_123_write_state")]; + tensor coreml_update_state_123 = read_state(input = v_cache1)[name = string("coreml_update_state_123")]; + int32 concat_648_values0_0 = const()[name = string("concat_648_values0_0"), val = int32(1)]; + int32 concat_648_values2_0 = const()[name = string("concat_648_values2_0"), val = int32(1280)]; + int32 concat_648_axis_0 = const()[name = string("concat_648_axis_0"), val = int32(0)]; + bool concat_648_interleave_0 = const()[name = string("concat_648_interleave_0"), val = bool(false)]; + tensor concat_648 = concat(axis = concat_648_axis_0, interleave = concat_648_interleave_0, values = (concat_648_values0_0, end_step_61, concat_648_values2_0))[name = string("concat_648")]; + tensor var_6355_begin_0 = const()[name = string("op_6355_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6355_end_mask_0 = const()[name = string("op_6355_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = concat_648, end_mask = var_6355_end_mask_0, x = k_cache_117_cast_fp16)[name = string("op_6355_cast_fp16")]; + tensor var_6358_begin_0 = const()[name = string("op_6358_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6358_end_mask_0 = const()[name = string("op_6358_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6358_cast_fp16 = slice_by_index(begin = var_6358_begin_0, end = concat_648, end_mask = var_6358_end_mask_0, x = v_cache_117_cast_fp16)[name = string("op_6358_cast_fp16")]; + tensor concat_650x = const()[name = string("concat_650x"), val = tensor([1, -1, 20, 64])]; + tensor var_6368_cast_fp16 = reshape(shape = concat_650x, x = linear_232_cast_fp16)[name = string("op_6368_cast_fp16")]; + tensor const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_235_cast_fp16 = mul(x = var_6368_cast_fp16, y = const_276_to_fp16)[name = string("q_235_cast_fp16")]; + tensor concat_651x = const()[name = string("concat_651x"), val = tensor([1, -1, 20, 64])]; + tensor var_6375_cast_fp16 = reshape(shape = concat_651x, x = var_6355_cast_fp16)[name = string("op_6375_cast_fp16")]; + tensor const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_295_cast_fp16 = mul(x = var_6375_cast_fp16, y = const_277_to_fp16)[name = string("k_295_cast_fp16")]; + tensor concat_652x = const()[name = string("concat_652x"), val = tensor([1, -1, 20, 64])]; + tensor var_6382_cast_fp16 = reshape(shape = concat_652x, x = var_6358_cast_fp16)[name = string("op_6382_cast_fp16")]; + tensor var_6383 = const()[name = string("op_6383"), val = tensor([0, 2, 1, 3])]; + bool qk_175_transpose_x_0 = const()[name = string("qk_175_transpose_x_0"), val = bool(false)]; + bool qk_175_transpose_y_0 = const()[name = string("qk_175_transpose_y_0"), val = bool(false)]; + tensor transpose_373_perm_0 = const()[name = string("transpose_373_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_374_perm_0 = const()[name = string("transpose_374_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_374 = transpose(perm = transpose_374_perm_0, x = k_295_cast_fp16)[name = string("transpose_406")]; + tensor transpose_373 = transpose(perm = transpose_373_perm_0, x = q_235_cast_fp16)[name = string("transpose_407")]; + tensor qk_175_cast_fp16 = matmul(transpose_x = qk_175_transpose_x_0, transpose_y = qk_175_transpose_y_0, x = transpose_373, y = transpose_374)[name = string("qk_175_cast_fp16")]; + int32 concat_653_values1_0 = const()[name = string("concat_653_values1_0"), val = int32(448)]; + int32 concat_653_axis_0 = const()[name = string("concat_653_axis_0"), val = int32(0)]; + bool concat_653_interleave_0 = const()[name = string("concat_653_interleave_0"), val = bool(false)]; + tensor concat_653 = concat(axis = concat_653_axis_0, interleave = concat_653_interleave_0, values = (gather_350_cast_uint16_to_int32, concat_653_values1_0))[name = string("concat_653")]; + tensor var_6386_begin_0 = const()[name = string("op_6386_begin_0"), val = tensor([0, 0])]; + tensor var_6386_end_mask_0 = const()[name = string("op_6386_end_mask_0"), val = tensor([false, true])]; + tensor var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = concat_653, end_mask = var_6386_end_mask_0, x = mask_to_fp16)[name = string("op_6386_cast_fp16")]; + int32 concat_654_values0_0 = const()[name = string("concat_654_values0_0"), val = int32(0)]; + int32 concat_654_axis_0 = const()[name = string("concat_654_axis_0"), val = int32(0)]; + bool concat_654_interleave_0 = const()[name = string("concat_654_interleave_0"), val = bool(false)]; + tensor concat_654 = concat(axis = concat_654_axis_0, interleave = concat_654_interleave_0, values = (concat_654_values0_0, gather_350_cast_uint16_to_int32))[name = string("concat_654")]; + tensor var_6387_begin_0 = const()[name = string("op_6387_begin_0"), val = tensor([0, 0])]; + tensor var_6387_end_mask_0 = const()[name = string("op_6387_end_mask_0"), val = tensor([true, false])]; + tensor var_6387_cast_fp16 = slice_by_index(begin = var_6387_begin_0, end = concat_654, end_mask = var_6387_end_mask_0, x = var_6386_cast_fp16)[name = string("op_6387_cast_fp16")]; + tensor qk_177_cast_fp16 = add(x = qk_175_cast_fp16, y = var_6387_cast_fp16)[name = string("qk_177_cast_fp16")]; + tensor var_6390_cast_fp16 = softmax(axis = var_6299, x = qk_177_cast_fp16)[name = string("op_6390_cast_fp16")]; + bool var_6392_transpose_x_0 = const()[name = string("op_6392_transpose_x_0"), val = bool(false)]; + bool var_6392_transpose_y_0 = const()[name = string("op_6392_transpose_y_0"), val = bool(false)]; + tensor v_295_cast_fp16 = transpose(perm = var_6383, x = var_6382_cast_fp16)[name = string("transpose_408")]; + tensor var_6392_cast_fp16 = matmul(transpose_x = var_6392_transpose_x_0, transpose_y = var_6392_transpose_y_0, x = var_6390_cast_fp16, y = v_295_cast_fp16)[name = string("op_6392_cast_fp16")]; + tensor var_6393 = const()[name = string("op_6393"), val = tensor([0, 2, 1, 3])]; + tensor concat_655x = const()[name = string("concat_655x"), val = tensor([1, -1, 1280])]; + tensor var_6394_cast_fp16 = transpose(perm = var_6393, x = var_6392_cast_fp16)[name = string("transpose_405")]; + tensor x_529_cast_fp16 = reshape(shape = concat_655x, x = var_6394_cast_fp16)[name = string("x_529_cast_fp16")]; + tensor var_6398_to_fp16 = const()[name = string("op_6398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1479616896)))]; + tensor var_6399_to_fp16 = const()[name = string("op_6399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482893760)))]; + tensor linear_235_cast_fp16 = linear(bias = var_6399_to_fp16, weight = var_6398_to_fp16, x = x_529_cast_fp16)[name = string("linear_235_cast_fp16")]; + tensor x_531_cast_fp16 = add(x = x_525_cast_fp16, y = linear_235_cast_fp16)[name = string("x_531_cast_fp16")]; + tensor var_6406_axes_0 = const()[name = string("op_6406_axes_0"), val = tensor([-1])]; + tensor blocks_29_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482896384)))]; + tensor blocks_29_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482899008)))]; + tensor var_6406_cast_fp16 = layer_norm(axes = var_6406_axes_0, beta = blocks_29_cross_attn_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_cross_attn_ln_weight_to_fp16, x = x_531_cast_fp16)[name = string("op_6406_cast_fp16")]; + tensor var_6415_to_fp16 = const()[name = string("op_6415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1482901632)))]; + tensor var_6416_to_fp16 = const()[name = string("op_6416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486178496)))]; + tensor linear_236_cast_fp16 = linear(bias = var_6416_to_fp16, weight = var_6415_to_fp16, x = var_6406_cast_fp16)[name = string("linear_236_cast_fp16")]; + tensor concat_656 = const()[name = string("concat_656"), val = tensor([0, 0, 0])]; + tensor concat_657 = const()[name = string("concat_657"), val = tensor([0, 1500, 0])]; + tensor k_297_internal_tensor_assign_1_stride_0 = const()[name = string("k_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_656, begin_mask = k_297_internal_tensor_assign_1_begin_mask_0, end = concat_657, end_mask = k_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_297_internal_tensor_assign_1_squeeze_mask_0, stride = k_297_internal_tensor_assign_1_stride_0, update = k_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("k_297_internal_tensor_assign_1_cast_fp16")]; + tensor concat_658 = const()[name = string("concat_658"), val = tensor([0, 0, 0])]; + tensor concat_659 = const()[name = string("concat_659"), val = tensor([0, 1500, 0])]; + tensor v_297_internal_tensor_assign_1_stride_0 = const()[name = string("v_297_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_297_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_297_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_297_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_297_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_297_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_658, begin_mask = v_297_internal_tensor_assign_1_begin_mask_0, end = concat_659, end_mask = v_297_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_297_internal_tensor_assign_1_squeeze_mask_0, stride = v_297_internal_tensor_assign_1_stride_0, update = v_cache_119_cast_fp16, x = k_7_to_fp16)[name = string("v_297_internal_tensor_assign_1_cast_fp16")]; + tensor concat_660x = const()[name = string("concat_660x"), val = tensor([1, -1, 20, 64])]; + tensor var_6436_cast_fp16 = reshape(shape = concat_660x, x = linear_236_cast_fp16)[name = string("op_6436_cast_fp16")]; + tensor const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_239_cast_fp16 = mul(x = var_6436_cast_fp16, y = const_278_to_fp16)[name = string("q_239_cast_fp16")]; + tensor var_6442 = const()[name = string("op_6442"), val = tensor([1, 1500, 20, -1])]; + tensor var_6443_cast_fp16 = reshape(shape = var_6442, x = k_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6443_cast_fp16")]; + tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_299_cast_fp16 = mul(x = var_6443_cast_fp16, y = const_279_to_fp16)[name = string("k_299_cast_fp16")]; + tensor var_6449 = const()[name = string("op_6449"), val = tensor([1, 1500, 20, -1])]; + tensor var_6450_cast_fp16 = reshape(shape = var_6449, x = v_297_internal_tensor_assign_1_cast_fp16)[name = string("op_6450_cast_fp16")]; + tensor var_6451 = const()[name = string("op_6451"), val = tensor([0, 2, 1, 3])]; + bool qk_179_transpose_x_0 = const()[name = string("qk_179_transpose_x_0"), val = bool(false)]; + bool qk_179_transpose_y_0 = const()[name = string("qk_179_transpose_y_0"), val = bool(false)]; + tensor transpose_375_perm_0 = const()[name = string("transpose_375_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_376_perm_0 = const()[name = string("transpose_376_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_376 = transpose(perm = transpose_376_perm_0, x = k_299_cast_fp16)[name = string("transpose_402")]; + tensor transpose_375 = transpose(perm = transpose_375_perm_0, x = q_239_cast_fp16)[name = string("transpose_403")]; + tensor qk_179_cast_fp16 = matmul(transpose_x = qk_179_transpose_x_0, transpose_y = qk_179_transpose_y_0, x = transpose_375, y = transpose_376)[name = string("qk_179_cast_fp16")]; + tensor var_6455_cast_fp16 = softmax(axis = var_6299, x = qk_179_cast_fp16)[name = string("op_6455_cast_fp16")]; + bool var_6457_transpose_x_0 = const()[name = string("op_6457_transpose_x_0"), val = bool(false)]; + bool var_6457_transpose_y_0 = const()[name = string("op_6457_transpose_y_0"), val = bool(false)]; + tensor v_299_cast_fp16 = transpose(perm = var_6451, x = var_6450_cast_fp16)[name = string("transpose_404")]; + tensor var_6457_cast_fp16 = matmul(transpose_x = var_6457_transpose_x_0, transpose_y = var_6457_transpose_y_0, x = var_6455_cast_fp16, y = v_299_cast_fp16)[name = string("op_6457_cast_fp16")]; + tensor var_6458 = const()[name = string("op_6458"), val = tensor([0, 2, 1, 3])]; + tensor concat_661x = const()[name = string("concat_661x"), val = tensor([1, -1, 1280])]; + tensor var_6459_cast_fp16 = transpose(perm = var_6458, x = var_6457_cast_fp16)[name = string("transpose_401")]; + tensor x_535_cast_fp16 = reshape(shape = concat_661x, x = var_6459_cast_fp16)[name = string("x_535_cast_fp16")]; + tensor var_6463_to_fp16 = const()[name = string("op_6463_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1486181120)))]; + tensor var_6464_to_fp16 = const()[name = string("op_6464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489457984)))]; + tensor linear_237_cast_fp16 = linear(bias = var_6464_to_fp16, weight = var_6463_to_fp16, x = x_535_cast_fp16)[name = string("linear_237_cast_fp16")]; + tensor x_537_cast_fp16 = add(x = x_531_cast_fp16, y = linear_237_cast_fp16)[name = string("x_537_cast_fp16")]; + tensor var_6471_axes_0 = const()[name = string("op_6471_axes_0"), val = tensor([-1])]; + tensor blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489460608)))]; + tensor blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489463232)))]; + tensor var_6471_cast_fp16 = layer_norm(axes = var_6471_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_6305_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_537_cast_fp16)[name = string("op_6471_cast_fp16")]; + tensor var_6480_to_fp16 = const()[name = string("op_6480_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489465856)))]; + tensor var_6481_to_fp16 = const()[name = string("op_6481_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502573120)))]; + tensor linear_238_cast_fp16 = linear(bias = var_6481_to_fp16, weight = var_6480_to_fp16, x = var_6471_cast_fp16)[name = string("linear_238_cast_fp16")]; + string x_541_mode_0 = const()[name = string("x_541_mode_0"), val = string("EXACT")]; + tensor x_541_cast_fp16 = gelu(mode = x_541_mode_0, x = linear_238_cast_fp16)[name = string("x_541_cast_fp16")]; + tensor var_6486_to_fp16 = const()[name = string("op_6486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1502583424)))]; + tensor var_6487_to_fp16 = const()[name = string("op_6487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515690688)))]; + tensor linear_239_cast_fp16 = linear(bias = var_6487_to_fp16, weight = var_6486_to_fp16, x = x_541_cast_fp16)[name = string("linear_239_cast_fp16")]; + tensor x_543_cast_fp16 = add(x = x_537_cast_fp16, y = linear_239_cast_fp16)[name = string("x_543_cast_fp16")]; + tensor k_cache_121_begin_0 = const()[name = string("k_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor k_cache_121_end_0 = const()[name = string("k_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; + tensor k_cache_121_end_mask_0 = const()[name = string("k_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_121_squeeze_mask_0 = const()[name = string("k_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_121_cast_fp16 = slice_by_index(begin = k_cache_121_begin_0, end = k_cache_121_end_0, end_mask = k_cache_121_end_mask_0, squeeze_mask = k_cache_121_squeeze_mask_0, x = coreml_update_state_122)[name = string("k_cache_121_cast_fp16")]; + tensor v_cache_121_begin_0 = const()[name = string("v_cache_121_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor v_cache_121_end_0 = const()[name = string("v_cache_121_end_0"), val = tensor([31, 1, 448, 1280])]; + tensor v_cache_121_end_mask_0 = const()[name = string("v_cache_121_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_121_squeeze_mask_0 = const()[name = string("v_cache_121_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_121_cast_fp16 = slice_by_index(begin = v_cache_121_begin_0, end = v_cache_121_end_0, end_mask = v_cache_121_end_mask_0, squeeze_mask = v_cache_121_squeeze_mask_0, x = coreml_update_state_123)[name = string("v_cache_121_cast_fp16")]; + tensor k_cache_123_begin_0 = const()[name = string("k_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor k_cache_123_end_0 = const()[name = string("k_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; + tensor k_cache_123_end_mask_0 = const()[name = string("k_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_123_squeeze_mask_0 = const()[name = string("k_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_123_cast_fp16 = slice_by_index(begin = k_cache_123_begin_0, end = k_cache_123_end_0, end_mask = k_cache_123_end_mask_0, squeeze_mask = k_cache_123_squeeze_mask_0, x = read_state_2)[name = string("k_cache_123_cast_fp16")]; + tensor v_cache_123_begin_0 = const()[name = string("v_cache_123_begin_0"), val = tensor([30, 0, 0, 0])]; + tensor v_cache_123_end_0 = const()[name = string("v_cache_123_end_0"), val = tensor([31, 1, 1500, 1280])]; + tensor v_cache_123_end_mask_0 = const()[name = string("v_cache_123_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_123_squeeze_mask_0 = const()[name = string("v_cache_123_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_123_cast_fp16 = slice_by_index(begin = v_cache_123_begin_0, end = v_cache_123_end_0, end_mask = v_cache_123_end_mask_0, squeeze_mask = v_cache_123_squeeze_mask_0, x = read_state_3)[name = string("v_cache_123_cast_fp16")]; + int32 var_6510 = const()[name = string("op_6510"), val = int32(-1)]; + tensor var_6528_axes_0 = const()[name = string("op_6528_axes_0"), val = tensor([-1])]; + tensor blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515693312)))]; + tensor blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515695936)))]; + fp16 var_6516_to_fp16 = const()[name = string("op_6516_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6528_cast_fp16 = layer_norm(axes = var_6528_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_543_cast_fp16)[name = string("op_6528_cast_fp16")]; + tensor var_6539_to_fp16 = const()[name = string("op_6539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1515698560)))]; + tensor var_6540_to_fp16 = const()[name = string("op_6540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518975424)))]; + tensor linear_240_cast_fp16 = linear(bias = var_6540_to_fp16, weight = var_6539_to_fp16, x = var_6528_cast_fp16)[name = string("linear_240_cast_fp16")]; + tensor var_6543_to_fp16 = const()[name = string("op_6543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1518978048)))]; + tensor linear_241_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6543_to_fp16, x = var_6528_cast_fp16)[name = string("linear_241_cast_fp16")]; + tensor var_6547_to_fp16 = const()[name = string("op_6547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1522254912)))]; + tensor var_6548_to_fp16 = const()[name = string("op_6548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525531776)))]; + tensor linear_242_cast_fp16 = linear(bias = var_6548_to_fp16, weight = var_6547_to_fp16, x = var_6528_cast_fp16)[name = string("linear_242_cast_fp16")]; + tensor var_6550_shape_cast_fp16 = shape(x = linear_240_cast_fp16)[name = string("op_6550_shape_cast_fp16")]; + int32 gather_362_axis_0 = const()[name = string("gather_362_axis_0"), val = int32(0)]; + int32 gather_362_batch_dims_0 = const()[name = string("gather_362_batch_dims_0"), val = int32(0)]; + bool gather_362_validate_indices_0 = const()[name = string("gather_362_validate_indices_0"), val = bool(false)]; + string var_6550_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6550_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_362_to_uint16 = const()[name = string("select_362_to_uint16"), val = uint16(1)]; + tensor var_6550_shape_cast_fp16_to_uint16 = cast(dtype = var_6550_shape_cast_fp16_to_uint16_dtype_0, x = var_6550_shape_cast_fp16)[name = string("cast_330")]; + uint16 gather_362_cast_uint16 = gather(axis = gather_362_axis_0, batch_dims = gather_362_batch_dims_0, indices = select_362_to_uint16, validate_indices = gather_362_validate_indices_0, x = var_6550_shape_cast_fp16_to_uint16)[name = string("gather_362_cast_uint16")]; + string gather_362_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_362_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_362_cast_uint16_to_int32 = cast(dtype = gather_362_cast_uint16_to_int32_dtype_0, x = gather_362_cast_uint16)[name = string("cast_329")]; + int32 end_step_63 = add(x = offset, y = gather_362_cast_uint16_to_int32)[name = string("end_step_63")]; + tensor expand_dims_480 = const()[name = string("expand_dims_480"), val = tensor([0])]; + tensor expand_dims_482 = const()[name = string("expand_dims_482"), val = tensor([0])]; + tensor expand_dims_483_axes_0 = const()[name = string("expand_dims_483_axes_0"), val = tensor([0])]; + tensor expand_dims_483 = expand_dims(axes = expand_dims_483_axes_0, x = end_step_63)[name = string("expand_dims_483")]; + tensor concat_664_values0_0 = const()[name = string("concat_664_values0_0"), val = tensor([30])]; + int32 concat_664_axis_0 = const()[name = string("concat_664_axis_0"), val = int32(0)]; + bool concat_664_interleave_0 = const()[name = string("concat_664_interleave_0"), val = bool(false)]; + tensor concat_664 = concat(axis = concat_664_axis_0, interleave = concat_664_interleave_0, values = (concat_664_values0_0, expand_dims_480, expand_dims_1, expand_dims_482))[name = string("concat_664")]; + tensor concat_665_values0_0 = const()[name = string("concat_665_values0_0"), val = tensor([0])]; + tensor concat_665_values1_0 = const()[name = string("concat_665_values1_0"), val = tensor([0])]; + tensor concat_665_values3_0 = const()[name = string("concat_665_values3_0"), val = tensor([0])]; + int32 concat_665_axis_0 = const()[name = string("concat_665_axis_0"), val = int32(0)]; + bool concat_665_interleave_0 = const()[name = string("concat_665_interleave_0"), val = bool(false)]; + tensor concat_665 = concat(axis = concat_665_axis_0, interleave = concat_665_interleave_0, values = (concat_665_values0_0, concat_665_values1_0, expand_dims_483, concat_665_values3_0))[name = string("concat_665")]; + tensor k_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = k_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = k_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_31_stride_0, update = linear_241_cast_fp16, x = coreml_update_state_122)[name = string("k_cache1_internal_tensor_assign_31_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_31_cast_fp16, input = k_cache1)[name = string("coreml_update_state_124_write_state")]; + tensor coreml_update_state_124 = read_state(input = k_cache1)[name = string("coreml_update_state_124")]; + tensor v_cache1_internal_tensor_assign_31_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_31_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_31_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_31_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_31_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_31_cast_fp16 = slice_update(begin = concat_664, begin_mask = v_cache1_internal_tensor_assign_31_begin_mask_0, end = concat_665, end_mask = v_cache1_internal_tensor_assign_31_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_31_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_31_stride_0, update = linear_242_cast_fp16, x = coreml_update_state_123)[name = string("v_cache1_internal_tensor_assign_31_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_31_cast_fp16, input = v_cache1)[name = string("coreml_update_state_125_write_state")]; + tensor coreml_update_state_125 = read_state(input = v_cache1)[name = string("coreml_update_state_125")]; + int32 concat_670_values0_0 = const()[name = string("concat_670_values0_0"), val = int32(1)]; + int32 concat_670_values2_0 = const()[name = string("concat_670_values2_0"), val = int32(1280)]; + int32 concat_670_axis_0 = const()[name = string("concat_670_axis_0"), val = int32(0)]; + bool concat_670_interleave_0 = const()[name = string("concat_670_interleave_0"), val = bool(false)]; + tensor concat_670 = concat(axis = concat_670_axis_0, interleave = concat_670_interleave_0, values = (concat_670_values0_0, end_step_63, concat_670_values2_0))[name = string("concat_670")]; + tensor var_6566_begin_0 = const()[name = string("op_6566_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6566_end_mask_0 = const()[name = string("op_6566_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = concat_670, end_mask = var_6566_end_mask_0, x = k_cache_121_cast_fp16)[name = string("op_6566_cast_fp16")]; + tensor var_6569_begin_0 = const()[name = string("op_6569_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6569_end_mask_0 = const()[name = string("op_6569_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6569_cast_fp16 = slice_by_index(begin = var_6569_begin_0, end = concat_670, end_mask = var_6569_end_mask_0, x = v_cache_121_cast_fp16)[name = string("op_6569_cast_fp16")]; + tensor concat_672x = const()[name = string("concat_672x"), val = tensor([1, -1, 20, 64])]; + tensor var_6579_cast_fp16 = reshape(shape = concat_672x, x = linear_240_cast_fp16)[name = string("op_6579_cast_fp16")]; + tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_243_cast_fp16 = mul(x = var_6579_cast_fp16, y = const_280_to_fp16)[name = string("q_243_cast_fp16")]; + tensor concat_673x = const()[name = string("concat_673x"), val = tensor([1, -1, 20, 64])]; + tensor var_6586_cast_fp16 = reshape(shape = concat_673x, x = var_6566_cast_fp16)[name = string("op_6586_cast_fp16")]; + tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_305_cast_fp16 = mul(x = var_6586_cast_fp16, y = const_281_to_fp16)[name = string("k_305_cast_fp16")]; + tensor concat_674x = const()[name = string("concat_674x"), val = tensor([1, -1, 20, 64])]; + tensor var_6593_cast_fp16 = reshape(shape = concat_674x, x = var_6569_cast_fp16)[name = string("op_6593_cast_fp16")]; + tensor var_6594 = const()[name = string("op_6594"), val = tensor([0, 2, 1, 3])]; + bool qk_181_transpose_x_0 = const()[name = string("qk_181_transpose_x_0"), val = bool(false)]; + bool qk_181_transpose_y_0 = const()[name = string("qk_181_transpose_y_0"), val = bool(false)]; + tensor transpose_377_perm_0 = const()[name = string("transpose_377_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_378_perm_0 = const()[name = string("transpose_378_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_378 = transpose(perm = transpose_378_perm_0, x = k_305_cast_fp16)[name = string("transpose_398")]; + tensor transpose_377 = transpose(perm = transpose_377_perm_0, x = q_243_cast_fp16)[name = string("transpose_399")]; + tensor qk_181_cast_fp16 = matmul(transpose_x = qk_181_transpose_x_0, transpose_y = qk_181_transpose_y_0, x = transpose_377, y = transpose_378)[name = string("qk_181_cast_fp16")]; + int32 concat_675_values1_0 = const()[name = string("concat_675_values1_0"), val = int32(448)]; + int32 concat_675_axis_0 = const()[name = string("concat_675_axis_0"), val = int32(0)]; + bool concat_675_interleave_0 = const()[name = string("concat_675_interleave_0"), val = bool(false)]; + tensor concat_675 = concat(axis = concat_675_axis_0, interleave = concat_675_interleave_0, values = (gather_362_cast_uint16_to_int32, concat_675_values1_0))[name = string("concat_675")]; + tensor var_6597_begin_0 = const()[name = string("op_6597_begin_0"), val = tensor([0, 0])]; + tensor var_6597_end_mask_0 = const()[name = string("op_6597_end_mask_0"), val = tensor([false, true])]; + tensor var_6597_cast_fp16 = slice_by_index(begin = var_6597_begin_0, end = concat_675, end_mask = var_6597_end_mask_0, x = mask_to_fp16)[name = string("op_6597_cast_fp16")]; + int32 concat_676_values0_0 = const()[name = string("concat_676_values0_0"), val = int32(0)]; + int32 concat_676_axis_0 = const()[name = string("concat_676_axis_0"), val = int32(0)]; + bool concat_676_interleave_0 = const()[name = string("concat_676_interleave_0"), val = bool(false)]; + tensor concat_676 = concat(axis = concat_676_axis_0, interleave = concat_676_interleave_0, values = (concat_676_values0_0, gather_362_cast_uint16_to_int32))[name = string("concat_676")]; + tensor var_6598_begin_0 = const()[name = string("op_6598_begin_0"), val = tensor([0, 0])]; + tensor var_6598_end_mask_0 = const()[name = string("op_6598_end_mask_0"), val = tensor([true, false])]; + tensor var_6598_cast_fp16 = slice_by_index(begin = var_6598_begin_0, end = concat_676, end_mask = var_6598_end_mask_0, x = var_6597_cast_fp16)[name = string("op_6598_cast_fp16")]; + tensor qk_183_cast_fp16 = add(x = qk_181_cast_fp16, y = var_6598_cast_fp16)[name = string("qk_183_cast_fp16")]; + tensor var_6601_cast_fp16 = softmax(axis = var_6510, x = qk_183_cast_fp16)[name = string("op_6601_cast_fp16")]; + bool var_6603_transpose_x_0 = const()[name = string("op_6603_transpose_x_0"), val = bool(false)]; + bool var_6603_transpose_y_0 = const()[name = string("op_6603_transpose_y_0"), val = bool(false)]; + tensor v_305_cast_fp16 = transpose(perm = var_6594, x = var_6593_cast_fp16)[name = string("transpose_400")]; + tensor var_6603_cast_fp16 = matmul(transpose_x = var_6603_transpose_x_0, transpose_y = var_6603_transpose_y_0, x = var_6601_cast_fp16, y = v_305_cast_fp16)[name = string("op_6603_cast_fp16")]; + tensor var_6604 = const()[name = string("op_6604"), val = tensor([0, 2, 1, 3])]; + tensor concat_677x = const()[name = string("concat_677x"), val = tensor([1, -1, 1280])]; + tensor var_6605_cast_fp16 = transpose(perm = var_6604, x = var_6603_cast_fp16)[name = string("transpose_397")]; + tensor x_547_cast_fp16 = reshape(shape = concat_677x, x = var_6605_cast_fp16)[name = string("x_547_cast_fp16")]; + tensor var_6609_to_fp16 = const()[name = string("op_6609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1525534400)))]; + tensor var_6610_to_fp16 = const()[name = string("op_6610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528811264)))]; + tensor linear_243_cast_fp16 = linear(bias = var_6610_to_fp16, weight = var_6609_to_fp16, x = x_547_cast_fp16)[name = string("linear_243_cast_fp16")]; + tensor x_549_cast_fp16 = add(x = x_543_cast_fp16, y = linear_243_cast_fp16)[name = string("x_549_cast_fp16")]; + tensor var_6617_axes_0 = const()[name = string("op_6617_axes_0"), val = tensor([-1])]; + tensor blocks_30_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528813888)))]; + tensor blocks_30_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528816512)))]; + tensor var_6617_cast_fp16 = layer_norm(axes = var_6617_axes_0, beta = blocks_30_cross_attn_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_cross_attn_ln_weight_to_fp16, x = x_549_cast_fp16)[name = string("op_6617_cast_fp16")]; + tensor var_6626_to_fp16 = const()[name = string("op_6626_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1528819136)))]; + tensor var_6627_to_fp16 = const()[name = string("op_6627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532096000)))]; + tensor linear_244_cast_fp16 = linear(bias = var_6627_to_fp16, weight = var_6626_to_fp16, x = var_6617_cast_fp16)[name = string("linear_244_cast_fp16")]; + tensor concat_678 = const()[name = string("concat_678"), val = tensor([0, 0, 0])]; + tensor concat_679 = const()[name = string("concat_679"), val = tensor([0, 1500, 0])]; + tensor k_307_internal_tensor_assign_1_stride_0 = const()[name = string("k_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_678, begin_mask = k_307_internal_tensor_assign_1_begin_mask_0, end = concat_679, end_mask = k_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_307_internal_tensor_assign_1_squeeze_mask_0, stride = k_307_internal_tensor_assign_1_stride_0, update = k_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("k_307_internal_tensor_assign_1_cast_fp16")]; + tensor concat_680 = const()[name = string("concat_680"), val = tensor([0, 0, 0])]; + tensor concat_681 = const()[name = string("concat_681"), val = tensor([0, 1500, 0])]; + tensor v_307_internal_tensor_assign_1_stride_0 = const()[name = string("v_307_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_307_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_307_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_307_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_307_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_307_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_680, begin_mask = v_307_internal_tensor_assign_1_begin_mask_0, end = concat_681, end_mask = v_307_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_307_internal_tensor_assign_1_squeeze_mask_0, stride = v_307_internal_tensor_assign_1_stride_0, update = v_cache_123_cast_fp16, x = k_7_to_fp16)[name = string("v_307_internal_tensor_assign_1_cast_fp16")]; + tensor concat_682x = const()[name = string("concat_682x"), val = tensor([1, -1, 20, 64])]; + tensor var_6647_cast_fp16 = reshape(shape = concat_682x, x = linear_244_cast_fp16)[name = string("op_6647_cast_fp16")]; + tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_247_cast_fp16 = mul(x = var_6647_cast_fp16, y = const_282_to_fp16)[name = string("q_247_cast_fp16")]; + tensor var_6653 = const()[name = string("op_6653"), val = tensor([1, 1500, 20, -1])]; + tensor var_6654_cast_fp16 = reshape(shape = var_6653, x = k_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6654_cast_fp16")]; + tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_309_cast_fp16 = mul(x = var_6654_cast_fp16, y = const_283_to_fp16)[name = string("k_309_cast_fp16")]; + tensor var_6660 = const()[name = string("op_6660"), val = tensor([1, 1500, 20, -1])]; + tensor var_6661_cast_fp16 = reshape(shape = var_6660, x = v_307_internal_tensor_assign_1_cast_fp16)[name = string("op_6661_cast_fp16")]; + tensor var_6662 = const()[name = string("op_6662"), val = tensor([0, 2, 1, 3])]; + bool qk_185_transpose_x_0 = const()[name = string("qk_185_transpose_x_0"), val = bool(false)]; + bool qk_185_transpose_y_0 = const()[name = string("qk_185_transpose_y_0"), val = bool(false)]; + tensor transpose_379_perm_0 = const()[name = string("transpose_379_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_380_perm_0 = const()[name = string("transpose_380_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_380 = transpose(perm = transpose_380_perm_0, x = k_309_cast_fp16)[name = string("transpose_394")]; + tensor transpose_379 = transpose(perm = transpose_379_perm_0, x = q_247_cast_fp16)[name = string("transpose_395")]; + tensor qk_185_cast_fp16 = matmul(transpose_x = qk_185_transpose_x_0, transpose_y = qk_185_transpose_y_0, x = transpose_379, y = transpose_380)[name = string("qk_185_cast_fp16")]; + tensor var_6666_cast_fp16 = softmax(axis = var_6510, x = qk_185_cast_fp16)[name = string("op_6666_cast_fp16")]; + bool var_6668_transpose_x_0 = const()[name = string("op_6668_transpose_x_0"), val = bool(false)]; + bool var_6668_transpose_y_0 = const()[name = string("op_6668_transpose_y_0"), val = bool(false)]; + tensor v_309_cast_fp16 = transpose(perm = var_6662, x = var_6661_cast_fp16)[name = string("transpose_396")]; + tensor var_6668_cast_fp16 = matmul(transpose_x = var_6668_transpose_x_0, transpose_y = var_6668_transpose_y_0, x = var_6666_cast_fp16, y = v_309_cast_fp16)[name = string("op_6668_cast_fp16")]; + tensor var_6669 = const()[name = string("op_6669"), val = tensor([0, 2, 1, 3])]; + tensor concat_683x = const()[name = string("concat_683x"), val = tensor([1, -1, 1280])]; + tensor var_6670_cast_fp16 = transpose(perm = var_6669, x = var_6668_cast_fp16)[name = string("transpose_393")]; + tensor x_553_cast_fp16 = reshape(shape = concat_683x, x = var_6670_cast_fp16)[name = string("x_553_cast_fp16")]; + tensor var_6674_to_fp16 = const()[name = string("op_6674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1532098624)))]; + tensor var_6675_to_fp16 = const()[name = string("op_6675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535375488)))]; + tensor linear_245_cast_fp16 = linear(bias = var_6675_to_fp16, weight = var_6674_to_fp16, x = x_553_cast_fp16)[name = string("linear_245_cast_fp16")]; + tensor x_555_cast_fp16 = add(x = x_549_cast_fp16, y = linear_245_cast_fp16)[name = string("x_555_cast_fp16")]; + tensor var_6682_axes_0 = const()[name = string("op_6682_axes_0"), val = tensor([-1])]; + tensor blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535378112)))]; + tensor blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535380736)))]; + tensor var_6682_cast_fp16 = layer_norm(axes = var_6682_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_6516_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_555_cast_fp16)[name = string("op_6682_cast_fp16")]; + tensor var_6691_to_fp16 = const()[name = string("op_6691_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1535383360)))]; + tensor var_6692_to_fp16 = const()[name = string("op_6692_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548490624)))]; + tensor linear_246_cast_fp16 = linear(bias = var_6692_to_fp16, weight = var_6691_to_fp16, x = var_6682_cast_fp16)[name = string("linear_246_cast_fp16")]; + string x_559_mode_0 = const()[name = string("x_559_mode_0"), val = string("EXACT")]; + tensor x_559_cast_fp16 = gelu(mode = x_559_mode_0, x = linear_246_cast_fp16)[name = string("x_559_cast_fp16")]; + tensor var_6697_to_fp16 = const()[name = string("op_6697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1548500928)))]; + tensor var_6698_to_fp16 = const()[name = string("op_6698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561608192)))]; + tensor linear_247_cast_fp16 = linear(bias = var_6698_to_fp16, weight = var_6697_to_fp16, x = x_559_cast_fp16)[name = string("linear_247_cast_fp16")]; + tensor x_561_cast_fp16 = add(x = x_555_cast_fp16, y = linear_247_cast_fp16)[name = string("x_561_cast_fp16")]; + tensor k_cache_125_begin_0 = const()[name = string("k_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor k_cache_125_end_0 = const()[name = string("k_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; + tensor k_cache_125_end_mask_0 = const()[name = string("k_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_125_squeeze_mask_0 = const()[name = string("k_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_125_cast_fp16 = slice_by_index(begin = k_cache_125_begin_0, end = k_cache_125_end_0, end_mask = k_cache_125_end_mask_0, squeeze_mask = k_cache_125_squeeze_mask_0, x = coreml_update_state_124)[name = string("k_cache_125_cast_fp16")]; + tensor v_cache_125_begin_0 = const()[name = string("v_cache_125_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor v_cache_125_end_0 = const()[name = string("v_cache_125_end_0"), val = tensor([32, 1, 448, 1280])]; + tensor v_cache_125_end_mask_0 = const()[name = string("v_cache_125_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_125_squeeze_mask_0 = const()[name = string("v_cache_125_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_125_cast_fp16 = slice_by_index(begin = v_cache_125_begin_0, end = v_cache_125_end_0, end_mask = v_cache_125_end_mask_0, squeeze_mask = v_cache_125_squeeze_mask_0, x = coreml_update_state_125)[name = string("v_cache_125_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([31, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([32, 1, 1500, 1280])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_6721 = const()[name = string("op_6721"), val = int32(-1)]; + tensor var_6739_axes_0 = const()[name = string("op_6739_axes_0"), val = tensor([-1])]; + tensor blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561610816)))]; + tensor blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561613440)))]; + fp16 var_6727_to_fp16 = const()[name = string("op_6727_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6739_cast_fp16 = layer_norm(axes = var_6739_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_561_cast_fp16)[name = string("op_6739_cast_fp16")]; + tensor var_6750_to_fp16 = const()[name = string("op_6750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1561616064)))]; + tensor var_6751_to_fp16 = const()[name = string("op_6751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564892928)))]; + tensor linear_248_cast_fp16 = linear(bias = var_6751_to_fp16, weight = var_6750_to_fp16, x = var_6739_cast_fp16)[name = string("linear_248_cast_fp16")]; + tensor var_6754_to_fp16 = const()[name = string("op_6754_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1564895552)))]; + tensor linear_249_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_6754_to_fp16, x = var_6739_cast_fp16)[name = string("linear_249_cast_fp16")]; + tensor var_6758_to_fp16 = const()[name = string("op_6758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1568172416)))]; + tensor var_6759_to_fp16 = const()[name = string("op_6759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571449280)))]; + tensor linear_250_cast_fp16 = linear(bias = var_6759_to_fp16, weight = var_6758_to_fp16, x = var_6739_cast_fp16)[name = string("linear_250_cast_fp16")]; + tensor var_6761_shape_cast_fp16 = shape(x = linear_248_cast_fp16)[name = string("op_6761_shape_cast_fp16")]; + int32 gather_374_axis_0 = const()[name = string("gather_374_axis_0"), val = int32(0)]; + int32 gather_374_batch_dims_0 = const()[name = string("gather_374_batch_dims_0"), val = int32(0)]; + bool gather_374_validate_indices_0 = const()[name = string("gather_374_validate_indices_0"), val = bool(false)]; + string var_6761_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_6761_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_374_to_uint16 = const()[name = string("select_374_to_uint16"), val = uint16(1)]; + tensor var_6761_shape_cast_fp16_to_uint16 = cast(dtype = var_6761_shape_cast_fp16_to_uint16_dtype_0, x = var_6761_shape_cast_fp16)[name = string("cast_328")]; + uint16 gather_374_cast_uint16 = gather(axis = gather_374_axis_0, batch_dims = gather_374_batch_dims_0, indices = select_374_to_uint16, validate_indices = gather_374_validate_indices_0, x = var_6761_shape_cast_fp16_to_uint16)[name = string("gather_374_cast_uint16")]; + string gather_374_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_374_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_374_cast_uint16_to_int32 = cast(dtype = gather_374_cast_uint16_to_int32_dtype_0, x = gather_374_cast_uint16)[name = string("cast_327")]; + int32 end_step = add(x = offset, y = gather_374_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_496 = const()[name = string("expand_dims_496"), val = tensor([0])]; + tensor expand_dims_498 = const()[name = string("expand_dims_498"), val = tensor([0])]; + tensor expand_dims_499_axes_0 = const()[name = string("expand_dims_499_axes_0"), val = tensor([0])]; + tensor expand_dims_499 = expand_dims(axes = expand_dims_499_axes_0, x = end_step)[name = string("expand_dims_499")]; + tensor concat_686_values0_0 = const()[name = string("concat_686_values0_0"), val = tensor([31])]; + int32 concat_686_axis_0 = const()[name = string("concat_686_axis_0"), val = int32(0)]; + bool concat_686_interleave_0 = const()[name = string("concat_686_interleave_0"), val = bool(false)]; + tensor concat_686 = concat(axis = concat_686_axis_0, interleave = concat_686_interleave_0, values = (concat_686_values0_0, expand_dims_496, expand_dims_1, expand_dims_498))[name = string("concat_686")]; + tensor concat_687_values0_0 = const()[name = string("concat_687_values0_0"), val = tensor([0])]; + tensor concat_687_values1_0 = const()[name = string("concat_687_values1_0"), val = tensor([0])]; + tensor concat_687_values3_0 = const()[name = string("concat_687_values3_0"), val = tensor([0])]; + int32 concat_687_axis_0 = const()[name = string("concat_687_axis_0"), val = int32(0)]; + bool concat_687_interleave_0 = const()[name = string("concat_687_interleave_0"), val = bool(false)]; + tensor concat_687 = concat(axis = concat_687_axis_0, interleave = concat_687_interleave_0, values = (concat_687_values0_0, concat_687_values1_0, expand_dims_499, concat_687_values3_0))[name = string("concat_687")]; + tensor k_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = k_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = k_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_32_stride_0, update = linear_249_cast_fp16, x = coreml_update_state_124)[name = string("k_cache1_internal_tensor_assign_32_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_32_cast_fp16, input = k_cache1)[name = string("coreml_update_state_126_write_state")]; + tensor v_cache1_internal_tensor_assign_32_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_32_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_32_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_32_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_32_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_32_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_32_cast_fp16 = slice_update(begin = concat_686, begin_mask = v_cache1_internal_tensor_assign_32_begin_mask_0, end = concat_687, end_mask = v_cache1_internal_tensor_assign_32_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_32_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_32_stride_0, update = linear_250_cast_fp16, x = coreml_update_state_125)[name = string("v_cache1_internal_tensor_assign_32_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_32_cast_fp16, input = v_cache1)[name = string("coreml_update_state_127_write_state")]; + int32 concat_692_values0_0 = const()[name = string("concat_692_values0_0"), val = int32(1)]; + int32 concat_692_values2_0 = const()[name = string("concat_692_values2_0"), val = int32(1280)]; + int32 concat_692_axis_0 = const()[name = string("concat_692_axis_0"), val = int32(0)]; + bool concat_692_interleave_0 = const()[name = string("concat_692_interleave_0"), val = bool(false)]; + tensor concat_692 = concat(axis = concat_692_axis_0, interleave = concat_692_interleave_0, values = (concat_692_values0_0, end_step, concat_692_values2_0))[name = string("concat_692")]; + tensor var_6777_begin_0 = const()[name = string("op_6777_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6777_end_mask_0 = const()[name = string("op_6777_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6777_cast_fp16 = slice_by_index(begin = var_6777_begin_0, end = concat_692, end_mask = var_6777_end_mask_0, x = k_cache_125_cast_fp16)[name = string("op_6777_cast_fp16")]; + tensor var_6780_begin_0 = const()[name = string("op_6780_begin_0"), val = tensor([0, 0, 0])]; + tensor var_6780_end_mask_0 = const()[name = string("op_6780_end_mask_0"), val = tensor([true, false, true])]; + tensor var_6780_cast_fp16 = slice_by_index(begin = var_6780_begin_0, end = concat_692, end_mask = var_6780_end_mask_0, x = v_cache_125_cast_fp16)[name = string("op_6780_cast_fp16")]; + tensor concat_694x = const()[name = string("concat_694x"), val = tensor([1, -1, 20, 64])]; + tensor var_6790_cast_fp16 = reshape(shape = concat_694x, x = linear_248_cast_fp16)[name = string("op_6790_cast_fp16")]; + tensor const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_251_cast_fp16 = mul(x = var_6790_cast_fp16, y = const_284_to_fp16)[name = string("q_251_cast_fp16")]; + tensor concat_695x = const()[name = string("concat_695x"), val = tensor([1, -1, 20, 64])]; + tensor var_6797_cast_fp16 = reshape(shape = concat_695x, x = var_6777_cast_fp16)[name = string("op_6797_cast_fp16")]; + tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_315_cast_fp16 = mul(x = var_6797_cast_fp16, y = const_285_to_fp16)[name = string("k_315_cast_fp16")]; + tensor concat_696x = const()[name = string("concat_696x"), val = tensor([1, -1, 20, 64])]; + tensor var_6804_cast_fp16 = reshape(shape = concat_696x, x = var_6780_cast_fp16)[name = string("op_6804_cast_fp16")]; + tensor var_6805 = const()[name = string("op_6805"), val = tensor([0, 2, 1, 3])]; + bool qk_187_transpose_x_0 = const()[name = string("qk_187_transpose_x_0"), val = bool(false)]; + bool qk_187_transpose_y_0 = const()[name = string("qk_187_transpose_y_0"), val = bool(false)]; + tensor transpose_381_perm_0 = const()[name = string("transpose_381_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_382_perm_0 = const()[name = string("transpose_382_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_382 = transpose(perm = transpose_382_perm_0, x = k_315_cast_fp16)[name = string("transpose_390")]; + tensor transpose_381 = transpose(perm = transpose_381_perm_0, x = q_251_cast_fp16)[name = string("transpose_391")]; + tensor qk_187_cast_fp16 = matmul(transpose_x = qk_187_transpose_x_0, transpose_y = qk_187_transpose_y_0, x = transpose_381, y = transpose_382)[name = string("qk_187_cast_fp16")]; + int32 concat_697_values1_0 = const()[name = string("concat_697_values1_0"), val = int32(448)]; + int32 concat_697_axis_0 = const()[name = string("concat_697_axis_0"), val = int32(0)]; + bool concat_697_interleave_0 = const()[name = string("concat_697_interleave_0"), val = bool(false)]; + tensor concat_697 = concat(axis = concat_697_axis_0, interleave = concat_697_interleave_0, values = (gather_374_cast_uint16_to_int32, concat_697_values1_0))[name = string("concat_697")]; + tensor var_6808_begin_0 = const()[name = string("op_6808_begin_0"), val = tensor([0, 0])]; + tensor var_6808_end_mask_0 = const()[name = string("op_6808_end_mask_0"), val = tensor([false, true])]; + tensor var_6808_cast_fp16 = slice_by_index(begin = var_6808_begin_0, end = concat_697, end_mask = var_6808_end_mask_0, x = mask_to_fp16)[name = string("op_6808_cast_fp16")]; + int32 concat_698_values0_0 = const()[name = string("concat_698_values0_0"), val = int32(0)]; + int32 concat_698_axis_0 = const()[name = string("concat_698_axis_0"), val = int32(0)]; + bool concat_698_interleave_0 = const()[name = string("concat_698_interleave_0"), val = bool(false)]; + tensor concat_698 = concat(axis = concat_698_axis_0, interleave = concat_698_interleave_0, values = (concat_698_values0_0, gather_374_cast_uint16_to_int32))[name = string("concat_698")]; + tensor var_6809_begin_0 = const()[name = string("op_6809_begin_0"), val = tensor([0, 0])]; + tensor var_6809_end_mask_0 = const()[name = string("op_6809_end_mask_0"), val = tensor([true, false])]; + tensor var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = concat_698, end_mask = var_6809_end_mask_0, x = var_6808_cast_fp16)[name = string("op_6809_cast_fp16")]; + tensor qk_189_cast_fp16 = add(x = qk_187_cast_fp16, y = var_6809_cast_fp16)[name = string("qk_189_cast_fp16")]; + tensor var_6812_cast_fp16 = softmax(axis = var_6721, x = qk_189_cast_fp16)[name = string("op_6812_cast_fp16")]; + bool var_6814_transpose_x_0 = const()[name = string("op_6814_transpose_x_0"), val = bool(false)]; + bool var_6814_transpose_y_0 = const()[name = string("op_6814_transpose_y_0"), val = bool(false)]; + tensor v_315_cast_fp16 = transpose(perm = var_6805, x = var_6804_cast_fp16)[name = string("transpose_392")]; + tensor var_6814_cast_fp16 = matmul(transpose_x = var_6814_transpose_x_0, transpose_y = var_6814_transpose_y_0, x = var_6812_cast_fp16, y = v_315_cast_fp16)[name = string("op_6814_cast_fp16")]; + tensor var_6815 = const()[name = string("op_6815"), val = tensor([0, 2, 1, 3])]; + tensor concat_699x = const()[name = string("concat_699x"), val = tensor([1, -1, 1280])]; + tensor var_6816_cast_fp16 = transpose(perm = var_6815, x = var_6814_cast_fp16)[name = string("transpose_389")]; + tensor x_565_cast_fp16 = reshape(shape = concat_699x, x = var_6816_cast_fp16)[name = string("x_565_cast_fp16")]; + tensor var_6820_to_fp16 = const()[name = string("op_6820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1571451904)))]; + tensor var_6821_to_fp16 = const()[name = string("op_6821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574728768)))]; + tensor linear_251_cast_fp16 = linear(bias = var_6821_to_fp16, weight = var_6820_to_fp16, x = x_565_cast_fp16)[name = string("linear_251_cast_fp16")]; + tensor x_567_cast_fp16 = add(x = x_561_cast_fp16, y = linear_251_cast_fp16)[name = string("x_567_cast_fp16")]; + tensor var_6828_axes_0 = const()[name = string("op_6828_axes_0"), val = tensor([-1])]; + tensor blocks_31_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574731392)))]; + tensor blocks_31_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574734016)))]; + tensor var_6828_cast_fp16 = layer_norm(axes = var_6828_axes_0, beta = blocks_31_cross_attn_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_cross_attn_ln_weight_to_fp16, x = x_567_cast_fp16)[name = string("op_6828_cast_fp16")]; + tensor var_6837_to_fp16 = const()[name = string("op_6837_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1574736640)))]; + tensor var_6838_to_fp16 = const()[name = string("op_6838_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578013504)))]; + tensor linear_252_cast_fp16 = linear(bias = var_6838_to_fp16, weight = var_6837_to_fp16, x = var_6828_cast_fp16)[name = string("linear_252_cast_fp16")]; + tensor concat_700 = const()[name = string("concat_700"), val = tensor([0, 0, 0])]; + tensor concat_701 = const()[name = string("concat_701"), val = tensor([0, 1500, 0])]; + tensor k_317_internal_tensor_assign_1_stride_0 = const()[name = string("k_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_700, begin_mask = k_317_internal_tensor_assign_1_begin_mask_0, end = concat_701, end_mask = k_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_317_internal_tensor_assign_1_squeeze_mask_0, stride = k_317_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_317_internal_tensor_assign_1_cast_fp16")]; + tensor concat_702 = const()[name = string("concat_702"), val = tensor([0, 0, 0])]; + tensor concat_703 = const()[name = string("concat_703"), val = tensor([0, 1500, 0])]; + tensor v_317_internal_tensor_assign_1_stride_0 = const()[name = string("v_317_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_317_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_317_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_317_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_317_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_317_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_702, begin_mask = v_317_internal_tensor_assign_1_begin_mask_0, end = concat_703, end_mask = v_317_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_317_internal_tensor_assign_1_squeeze_mask_0, stride = v_317_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_317_internal_tensor_assign_1_cast_fp16")]; + tensor concat_704x = const()[name = string("concat_704x"), val = tensor([1, -1, 20, 64])]; + tensor var_6858_cast_fp16 = reshape(shape = concat_704x, x = linear_252_cast_fp16)[name = string("op_6858_cast_fp16")]; + tensor const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_6858_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")]; + tensor var_6864 = const()[name = string("op_6864"), val = tensor([1, 1500, 20, -1])]; + tensor var_6865_cast_fp16 = reshape(shape = var_6864, x = k_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6865_cast_fp16")]; + tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_6865_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")]; + tensor var_6871 = const()[name = string("op_6871"), val = tensor([1, 1500, 20, -1])]; + tensor var_6872_cast_fp16 = reshape(shape = var_6871, x = v_317_internal_tensor_assign_1_cast_fp16)[name = string("op_6872_cast_fp16")]; + tensor var_6873 = const()[name = string("op_6873"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_383_perm_0 = const()[name = string("transpose_383_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_384_perm_0 = const()[name = string("transpose_384_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_384 = transpose(perm = transpose_384_perm_0, x = k_cast_fp16)[name = string("transpose_386")]; + tensor transpose_383 = transpose(perm = transpose_383_perm_0, x = q_cast_fp16)[name = string("transpose_387")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_383, y = transpose_384)[name = string("qk_cast_fp16")]; + tensor var_6877_cast_fp16 = softmax(axis = var_6721, x = qk_cast_fp16)[name = string("op_6877_cast_fp16")]; + bool var_6879_transpose_x_0 = const()[name = string("op_6879_transpose_x_0"), val = bool(false)]; + bool var_6879_transpose_y_0 = const()[name = string("op_6879_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_6873, x = var_6872_cast_fp16)[name = string("transpose_388")]; + tensor var_6879_cast_fp16 = matmul(transpose_x = var_6879_transpose_x_0, transpose_y = var_6879_transpose_y_0, x = var_6877_cast_fp16, y = v_cast_fp16)[name = string("op_6879_cast_fp16")]; + tensor var_6880 = const()[name = string("op_6880"), val = tensor([0, 2, 1, 3])]; + tensor concat_705x = const()[name = string("concat_705x"), val = tensor([1, -1, 1280])]; + tensor var_6881_cast_fp16 = transpose(perm = var_6880, x = var_6879_cast_fp16)[name = string("transpose_385")]; + tensor x_571_cast_fp16 = reshape(shape = concat_705x, x = var_6881_cast_fp16)[name = string("x_571_cast_fp16")]; + tensor var_6885_to_fp16 = const()[name = string("op_6885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578016128)))]; + tensor var_6886_to_fp16 = const()[name = string("op_6886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581292992)))]; + tensor linear_253_cast_fp16 = linear(bias = var_6886_to_fp16, weight = var_6885_to_fp16, x = x_571_cast_fp16)[name = string("linear_253_cast_fp16")]; + tensor x_573_cast_fp16 = add(x = x_567_cast_fp16, y = linear_253_cast_fp16)[name = string("x_573_cast_fp16")]; + tensor var_6893_axes_0 = const()[name = string("op_6893_axes_0"), val = tensor([-1])]; + tensor blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581295616)))]; + tensor blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581298240)))]; + tensor var_6893_cast_fp16 = layer_norm(axes = var_6893_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_6727_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_573_cast_fp16)[name = string("op_6893_cast_fp16")]; + tensor var_6902_to_fp16 = const()[name = string("op_6902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1581300864)))]; + tensor var_6903_to_fp16 = const()[name = string("op_6903_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594408128)))]; + tensor linear_254_cast_fp16 = linear(bias = var_6903_to_fp16, weight = var_6902_to_fp16, x = var_6893_cast_fp16)[name = string("linear_254_cast_fp16")]; + string x_577_mode_0 = const()[name = string("x_577_mode_0"), val = string("EXACT")]; + tensor x_577_cast_fp16 = gelu(mode = x_577_mode_0, x = linear_254_cast_fp16)[name = string("x_577_cast_fp16")]; + tensor var_6908_to_fp16 = const()[name = string("op_6908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1594418432)))]; + tensor var_6909_to_fp16 = const()[name = string("op_6909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607525696)))]; + tensor linear_255_cast_fp16 = linear(bias = var_6909_to_fp16, weight = var_6908_to_fp16, x = x_577_cast_fp16)[name = string("linear_255_cast_fp16")]; + tensor x_579_cast_fp16 = add(x = x_573_cast_fp16, y = linear_255_cast_fp16)[name = string("x_579_cast_fp16")]; + tensor var_6922_axes_0 = const()[name = string("op_6922_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607528320)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607530944)))]; + fp16 var_6913_to_fp16 = const()[name = string("op_6913_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_6922_cast_fp16 = layer_norm(axes = var_6922_axes_0, beta = ln_bias_to_fp16, epsilon = var_6913_to_fp16, gamma = ln_weight_to_fp16, x = x_579_cast_fp16)[name = string("op_6922_cast_fp16")]; + tensor var_6932_bias_0_to_fp16 = const()[name = string("op_6932_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1607533568)))]; + tensor logits = linear(bias = var_6932_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_6922_cast_fp16)[name = string("op_6932_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/large-v3/decoder_second.mlmodelc/weights/weight.bin b/large-v3/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c5a4ed8a3f05219c651d7ab76589c894c2dd42d --- /dev/null +++ b/large-v3/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a757620673f9ff3498457f742697cea3fbe6ad2754099f72ef6f0151ca0314 +size 1607637364 diff --git a/large-v3/encoder.mlmodelc/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..41ca7472382cf724a64670ac17a1afc902f53aa7 --- /dev/null +++ b/large-v3/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6182464ab69572de2980864c2fd4edc10b4b269f5fb25f0cbf5e22a86d36abc6 +size 202 diff --git a/large-v3/encoder.mlmodelc/coremldata.bin b/large-v3/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..865e1780dfe74afcc6790112d691462f020d0651 --- /dev/null +++ b/large-v3/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46dc321dd0ff6005125dc0365c3e0ecb2413f838328888df48578af4d2869749 +size 197 diff --git a/large-v3/encoder.mlmodelc/metadata.json b/large-v3/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..09266292ff38dc28253acec8329c16abfc1c410a --- /dev/null +++ b/large-v3/encoder.mlmodelc/metadata.json @@ -0,0 +1,76 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 1280)", + "shortDescription" : "", + "shape" : "[1, 1500, 1280]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.add" : 65, + "Ios18.reshape" : 128, + "Ios18.linear" : 192, + "Ios18.gelu" : 34, + "Ios18.matmul" : 64, + "Ios18.transpose" : 129, + "Ios18.layerNorm" : 65, + "Ios18.conv" : 2, + "Ios18.cast" : 4, + "Ios18.softmax" : 32, + "Ios18.mul" : 64 + }, + "computePrecision" : "Mixed (Float16, Float32, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_pipeline", + "structure" : [ + { + "name" : "MLModelType_mlProgram" + }, + { + "name" : "MLModelType_mlProgram" + } + ] + }, + "userDefinedMetadata" : { + + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 128 × 3000)", + "shortDescription" : "", + "shape" : "[1, 128, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "chunked_pipeline", + "method" : "predict" + } +] \ No newline at end of file diff --git a/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model0/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a +size 108 diff --git a/large-v3/encoder.mlmodelc/model0/coremldata.bin b/large-v3/encoder.mlmodelc/model0/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b0d1904141cd0a6f720a7b6752d01959f5b3081 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model0/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b0461e225831cc34e0017a300f867929784559e2ee471f01ddfd3452381076 +size 201 diff --git a/large-v3/encoder.mlmodelc/model0/model.mil b/large-v3/encoder.mlmodelc/model0/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..af87fe33cb70a8c4f508e264a7682accb9cec40a --- /dev/null +++ b/large-v3/encoder.mlmodelc/model0/model.mil @@ -0,0 +1,962 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor logmel_data) { + string var_84_pad_type_0 = const()[name = string("op_84_pad_type_0"), val = string("custom")]; + tensor var_84_pad_0 = const()[name = string("op_84_pad_0"), val = tensor([1, 1])]; + tensor var_84_strides_0 = const()[name = string("op_84_strides_0"), val = tensor([1])]; + tensor var_84_dilations_0 = const()[name = string("op_84_dilations_0"), val = tensor([1])]; + int32 var_84_groups_0 = const()[name = string("op_84_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(983168)))]; + tensor var_84_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_84_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_102_pad_type_0 = const()[name = string("op_102_pad_type_0"), val = string("custom")]; + tensor var_102_pad_0 = const()[name = string("op_102_pad_0"), val = tensor([1, 1])]; + tensor var_102_strides_0 = const()[name = string("op_102_strides_0"), val = tensor([2])]; + tensor var_102_dilations_0 = const()[name = string("op_102_dilations_0"), val = tensor([1])]; + int32 var_102_groups_0 = const()[name = string("op_102_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(985792)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10816256)))]; + tensor var_102_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_102_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_108 = const()[name = string("op_108"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(10818880)))]; + tensor x_5_cast_fp16 = transpose(perm = var_108, x = x_3_cast_fp16)[name = string("transpose_160")]; + tensor var_111_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_111_cast_fp16")]; + int32 var_124 = const()[name = string("op_124"), val = int32(-1)]; + tensor var_140_axes_0 = const()[name = string("op_140_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14658944)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14661568)))]; + fp16 var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_140_cast_fp16 = layer_norm(axes = var_140_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_111_cast_fp16)[name = string("op_140_cast_fp16")]; + tensor var_151_to_fp16 = const()[name = string("op_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(14664192)))]; + tensor var_152_to_fp16 = const()[name = string("op_152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17941056)))]; + tensor linear_0_cast_fp16 = linear(bias = var_152_to_fp16, weight = var_151_to_fp16, x = var_140_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(17943680)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(21220544)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_155_to_fp16, x = var_140_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(21223168)))]; + tensor var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24500032)))]; + tensor linear_2_cast_fp16 = linear(bias = var_160_to_fp16, weight = var_159_to_fp16, x = var_140_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_168 = const()[name = string("op_168"), val = tensor([1, 1500, 20, -1])]; + tensor var_169_cast_fp16 = reshape(shape = var_168, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")]; + tensor const_224_to_fp16 = const()[name = string("const_224_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_224_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_175 = const()[name = string("op_175"), val = tensor([1, 1500, 20, -1])]; + tensor var_176_cast_fp16 = reshape(shape = var_175, x = linear_1_cast_fp16)[name = string("op_176_cast_fp16")]; + tensor const_225_to_fp16 = const()[name = string("const_225_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_176_cast_fp16, y = const_225_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_182 = const()[name = string("op_182"), val = tensor([1, 1500, 20, -1])]; + tensor var_183_cast_fp16 = reshape(shape = var_182, x = linear_2_cast_fp16)[name = string("op_183_cast_fp16")]; + tensor var_184 = const()[name = string("op_184"), val = tensor([0, 2, -3, -1])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = k_3_cast_fp16)[name = string("transpose_158")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = q_3_cast_fp16)[name = string("transpose_159")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_1_cast_fp16")]; + tensor var_188_cast_fp16 = softmax(axis = var_124, x = qk_1_cast_fp16)[name = string("op_188_cast_fp16")]; + bool var_190_transpose_x_0 = const()[name = string("op_190_transpose_x_0"), val = bool(false)]; + bool var_190_transpose_y_0 = const()[name = string("op_190_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_157")]; + tensor var_190_cast_fp16 = matmul(transpose_x = var_190_transpose_x_0, transpose_y = var_190_transpose_y_0, x = var_188_cast_fp16, y = v_3_cast_fp16)[name = string("op_190_cast_fp16")]; + tensor var_191 = const()[name = string("op_191"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 1280])]; + tensor var_192_cast_fp16 = transpose(perm = var_191, x = var_190_cast_fp16)[name = string("transpose_156")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_192_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(24502656)))]; + tensor var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27779520)))]; + tensor linear_3_cast_fp16 = linear(bias = var_197_to_fp16, weight = var_196_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_111_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_204_axes_0 = const()[name = string("op_204_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27782144)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27784768)))]; + tensor var_204_cast_fp16 = layer_norm(axes = var_204_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_130_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor var_213_to_fp16 = const()[name = string("op_213_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(27787392)))]; + tensor var_214_to_fp16 = const()[name = string("op_214_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40894656)))]; + tensor linear_4_cast_fp16 = linear(bias = var_214_to_fp16, weight = var_213_to_fp16, x = var_204_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(40904960)))]; + tensor var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54012224)))]; + tensor linear_5_cast_fp16 = linear(bias = var_220_to_fp16, weight = var_219_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_230 = const()[name = string("op_230"), val = int32(-1)]; + tensor var_246_axes_0 = const()[name = string("op_246_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54014848)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54017472)))]; + fp16 var_236_to_fp16 = const()[name = string("op_236_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_246_cast_fp16 = layer_norm(axes = var_246_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_246_cast_fp16")]; + tensor var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(54020096)))]; + tensor var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(57296960)))]; + tensor linear_6_cast_fp16 = linear(bias = var_258_to_fp16, weight = var_257_to_fp16, x = var_246_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(57299584)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_261_to_fp16, x = var_246_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(60576448)))]; + tensor var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63853312)))]; + tensor linear_8_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = var_246_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_274 = const()[name = string("op_274"), val = tensor([1, 1500, 20, -1])]; + tensor var_275_cast_fp16 = reshape(shape = var_274, x = linear_6_cast_fp16)[name = string("op_275_cast_fp16")]; + tensor const_226_to_fp16 = const()[name = string("const_226_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_275_cast_fp16, y = const_226_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_281 = const()[name = string("op_281"), val = tensor([1, 1500, 20, -1])]; + tensor var_282_cast_fp16 = reshape(shape = var_281, x = linear_7_cast_fp16)[name = string("op_282_cast_fp16")]; + tensor const_227_to_fp16 = const()[name = string("const_227_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_282_cast_fp16, y = const_227_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_288 = const()[name = string("op_288"), val = tensor([1, 1500, 20, -1])]; + tensor var_289_cast_fp16 = reshape(shape = var_288, x = linear_8_cast_fp16)[name = string("op_289_cast_fp16")]; + tensor var_290 = const()[name = string("op_290"), val = tensor([0, 2, -3, -1])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = k_7_cast_fp16)[name = string("transpose_154")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = q_7_cast_fp16)[name = string("transpose_155")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_3_cast_fp16")]; + tensor var_294_cast_fp16 = softmax(axis = var_230, x = qk_3_cast_fp16)[name = string("op_294_cast_fp16")]; + bool var_296_transpose_x_0 = const()[name = string("op_296_transpose_x_0"), val = bool(false)]; + bool var_296_transpose_y_0 = const()[name = string("op_296_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_290, x = var_289_cast_fp16)[name = string("transpose_153")]; + tensor var_296_cast_fp16 = matmul(transpose_x = var_296_transpose_x_0, transpose_y = var_296_transpose_y_0, x = var_294_cast_fp16, y = v_7_cast_fp16)[name = string("op_296_cast_fp16")]; + tensor var_297 = const()[name = string("op_297"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 1280])]; + tensor var_298_cast_fp16 = transpose(perm = var_297, x = var_296_cast_fp16)[name = string("transpose_152")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_298_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(63855936)))]; + tensor var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67132800)))]; + tensor linear_9_cast_fp16 = linear(bias = var_303_to_fp16, weight = var_302_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_310_axes_0 = const()[name = string("op_310_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67135424)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67138048)))]; + tensor var_310_cast_fp16 = layer_norm(axes = var_310_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_236_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_310_cast_fp16")]; + tensor var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(67140672)))]; + tensor var_320_to_fp16 = const()[name = string("op_320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(80247936)))]; + tensor linear_10_cast_fp16 = linear(bias = var_320_to_fp16, weight = var_319_to_fp16, x = var_310_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_325_to_fp16 = const()[name = string("op_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(80258240)))]; + tensor var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93365504)))]; + tensor linear_11_cast_fp16 = linear(bias = var_326_to_fp16, weight = var_325_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_336 = const()[name = string("op_336"), val = int32(-1)]; + tensor var_352_axes_0 = const()[name = string("op_352_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93368128)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93370752)))]; + fp16 var_342_to_fp16 = const()[name = string("op_342_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_352_cast_fp16 = layer_norm(axes = var_352_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_352_cast_fp16")]; + tensor var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(93373376)))]; + tensor var_364_to_fp16 = const()[name = string("op_364_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96650240)))]; + tensor linear_12_cast_fp16 = linear(bias = var_364_to_fp16, weight = var_363_to_fp16, x = var_352_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(96652864)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_367_to_fp16, x = var_352_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_371_to_fp16 = const()[name = string("op_371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(99929728)))]; + tensor var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(103206592)))]; + tensor linear_14_cast_fp16 = linear(bias = var_372_to_fp16, weight = var_371_to_fp16, x = var_352_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_380 = const()[name = string("op_380"), val = tensor([1, 1500, 20, -1])]; + tensor var_381_cast_fp16 = reshape(shape = var_380, x = linear_12_cast_fp16)[name = string("op_381_cast_fp16")]; + tensor const_228_to_fp16 = const()[name = string("const_228_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_381_cast_fp16, y = const_228_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_387 = const()[name = string("op_387"), val = tensor([1, 1500, 20, -1])]; + tensor var_388_cast_fp16 = reshape(shape = var_387, x = linear_13_cast_fp16)[name = string("op_388_cast_fp16")]; + tensor const_229_to_fp16 = const()[name = string("const_229_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_388_cast_fp16, y = const_229_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_394 = const()[name = string("op_394"), val = tensor([1, 1500, 20, -1])]; + tensor var_395_cast_fp16 = reshape(shape = var_394, x = linear_14_cast_fp16)[name = string("op_395_cast_fp16")]; + tensor var_396 = const()[name = string("op_396"), val = tensor([0, 2, -3, -1])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = k_11_cast_fp16)[name = string("transpose_150")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = q_11_cast_fp16)[name = string("transpose_151")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_5_cast_fp16")]; + tensor var_400_cast_fp16 = softmax(axis = var_336, x = qk_5_cast_fp16)[name = string("op_400_cast_fp16")]; + bool var_402_transpose_x_0 = const()[name = string("op_402_transpose_x_0"), val = bool(false)]; + bool var_402_transpose_y_0 = const()[name = string("op_402_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_396, x = var_395_cast_fp16)[name = string("transpose_149")]; + tensor var_402_cast_fp16 = matmul(transpose_x = var_402_transpose_x_0, transpose_y = var_402_transpose_y_0, x = var_400_cast_fp16, y = v_11_cast_fp16)[name = string("op_402_cast_fp16")]; + tensor var_403 = const()[name = string("op_403"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 1280])]; + tensor var_404_cast_fp16 = transpose(perm = var_403, x = var_402_cast_fp16)[name = string("transpose_148")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_404_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(103209216)))]; + tensor var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106486080)))]; + tensor linear_15_cast_fp16 = linear(bias = var_409_to_fp16, weight = var_408_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_416_axes_0 = const()[name = string("op_416_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106488704)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106491328)))]; + tensor var_416_cast_fp16 = layer_norm(axes = var_416_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_342_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_416_cast_fp16")]; + tensor var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(106493952)))]; + tensor var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119601216)))]; + tensor linear_16_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = var_416_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(119611520)))]; + tensor var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132718784)))]; + tensor linear_17_cast_fp16 = linear(bias = var_432_to_fp16, weight = var_431_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_442 = const()[name = string("op_442"), val = int32(-1)]; + tensor var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132721408)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132724032)))]; + fp16 var_448_to_fp16 = const()[name = string("op_448_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_458_cast_fp16 = layer_norm(axes = var_458_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_458_cast_fp16")]; + tensor var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(132726656)))]; + tensor var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(136003520)))]; + tensor linear_18_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = var_458_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_473_to_fp16 = const()[name = string("op_473_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(136006144)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_473_to_fp16, x = var_458_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(139283008)))]; + tensor var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142559872)))]; + tensor linear_20_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = var_458_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_486 = const()[name = string("op_486"), val = tensor([1, 1500, 20, -1])]; + tensor var_487_cast_fp16 = reshape(shape = var_486, x = linear_18_cast_fp16)[name = string("op_487_cast_fp16")]; + tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_487_cast_fp16, y = const_230_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_493 = const()[name = string("op_493"), val = tensor([1, 1500, 20, -1])]; + tensor var_494_cast_fp16 = reshape(shape = var_493, x = linear_19_cast_fp16)[name = string("op_494_cast_fp16")]; + tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_494_cast_fp16, y = const_231_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_500 = const()[name = string("op_500"), val = tensor([1, 1500, 20, -1])]; + tensor var_501_cast_fp16 = reshape(shape = var_500, x = linear_20_cast_fp16)[name = string("op_501_cast_fp16")]; + tensor var_502 = const()[name = string("op_502"), val = tensor([0, 2, -3, -1])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = k_15_cast_fp16)[name = string("transpose_146")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = q_15_cast_fp16)[name = string("transpose_147")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_7_cast_fp16")]; + tensor var_506_cast_fp16 = softmax(axis = var_442, x = qk_7_cast_fp16)[name = string("op_506_cast_fp16")]; + bool var_508_transpose_x_0 = const()[name = string("op_508_transpose_x_0"), val = bool(false)]; + bool var_508_transpose_y_0 = const()[name = string("op_508_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = string("transpose_145")]; + tensor var_508_cast_fp16 = matmul(transpose_x = var_508_transpose_x_0, transpose_y = var_508_transpose_y_0, x = var_506_cast_fp16, y = v_15_cast_fp16)[name = string("op_508_cast_fp16")]; + tensor var_509 = const()[name = string("op_509"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 1280])]; + tensor var_510_cast_fp16 = transpose(perm = var_509, x = var_508_cast_fp16)[name = string("transpose_144")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_510_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(142562496)))]; + tensor var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145839360)))]; + tensor linear_21_cast_fp16 = linear(bias = var_515_to_fp16, weight = var_514_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_522_axes_0 = const()[name = string("op_522_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145841984)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145844608)))]; + tensor var_522_cast_fp16 = layer_norm(axes = var_522_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_448_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_522_cast_fp16")]; + tensor var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(145847232)))]; + tensor var_532_to_fp16 = const()[name = string("op_532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158954496)))]; + tensor linear_22_cast_fp16 = linear(bias = var_532_to_fp16, weight = var_531_to_fp16, x = var_522_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(158964800)))]; + tensor var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172072064)))]; + tensor linear_23_cast_fp16 = linear(bias = var_538_to_fp16, weight = var_537_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")]; + int32 var_548 = const()[name = string("op_548"), val = int32(-1)]; + tensor var_564_axes_0 = const()[name = string("op_564_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172074688)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172077312)))]; + fp16 var_554_to_fp16 = const()[name = string("op_554_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_564_cast_fp16 = layer_norm(axes = var_564_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_564_cast_fp16")]; + tensor var_575_to_fp16 = const()[name = string("op_575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(172079936)))]; + tensor var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(175356800)))]; + tensor linear_24_cast_fp16 = linear(bias = var_576_to_fp16, weight = var_575_to_fp16, x = var_564_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_579_to_fp16 = const()[name = string("op_579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(175359424)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_579_to_fp16, x = var_564_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_583_to_fp16 = const()[name = string("op_583_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(178636288)))]; + tensor var_584_to_fp16 = const()[name = string("op_584_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181913152)))]; + tensor linear_26_cast_fp16 = linear(bias = var_584_to_fp16, weight = var_583_to_fp16, x = var_564_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_592 = const()[name = string("op_592"), val = tensor([1, 1500, 20, -1])]; + tensor var_593_cast_fp16 = reshape(shape = var_592, x = linear_24_cast_fp16)[name = string("op_593_cast_fp16")]; + tensor const_232_to_fp16 = const()[name = string("const_232_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_593_cast_fp16, y = const_232_to_fp16)[name = string("q_19_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([1, 1500, 20, -1])]; + tensor var_600_cast_fp16 = reshape(shape = var_599, x = linear_25_cast_fp16)[name = string("op_600_cast_fp16")]; + tensor const_233_to_fp16 = const()[name = string("const_233_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_600_cast_fp16, y = const_233_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_606 = const()[name = string("op_606"), val = tensor([1, 1500, 20, -1])]; + tensor var_607_cast_fp16 = reshape(shape = var_606, x = linear_26_cast_fp16)[name = string("op_607_cast_fp16")]; + tensor var_608 = const()[name = string("op_608"), val = tensor([0, 2, -3, -1])]; + bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)]; + bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = k_19_cast_fp16)[name = string("transpose_142")]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = q_19_cast_fp16)[name = string("transpose_143")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_9_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_548, x = qk_9_cast_fp16)[name = string("op_612_cast_fp16")]; + bool var_614_transpose_x_0 = const()[name = string("op_614_transpose_x_0"), val = bool(false)]; + bool var_614_transpose_y_0 = const()[name = string("op_614_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_608, x = var_607_cast_fp16)[name = string("transpose_141")]; + tensor var_614_cast_fp16 = matmul(transpose_x = var_614_transpose_x_0, transpose_y = var_614_transpose_y_0, x = var_612_cast_fp16, y = v_19_cast_fp16)[name = string("op_614_cast_fp16")]; + tensor var_615 = const()[name = string("op_615"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 1500, 1280])]; + tensor var_616_cast_fp16 = transpose(perm = var_615, x = var_614_cast_fp16)[name = string("transpose_140")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = var_616_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(181915776)))]; + tensor var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185192640)))]; + tensor linear_27_cast_fp16 = linear(bias = var_621_to_fp16, weight = var_620_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_628_axes_0 = const()[name = string("op_628_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185195264)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185197888)))]; + tensor var_628_cast_fp16 = layer_norm(axes = var_628_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_554_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_628_cast_fp16")]; + tensor var_637_to_fp16 = const()[name = string("op_637_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(185200512)))]; + tensor var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(198307776)))]; + tensor linear_28_cast_fp16 = linear(bias = var_638_to_fp16, weight = var_637_to_fp16, x = var_628_cast_fp16)[name = string("linear_28_cast_fp16")]; + string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")]; + tensor var_643_to_fp16 = const()[name = string("op_643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(198318080)))]; + tensor var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211425344)))]; + tensor linear_29_cast_fp16 = linear(bias = var_644_to_fp16, weight = var_643_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")]; + int32 var_654 = const()[name = string("op_654"), val = int32(-1)]; + tensor var_670_axes_0 = const()[name = string("op_670_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211427968)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211430592)))]; + fp16 var_660_to_fp16 = const()[name = string("op_660_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_670_cast_fp16 = layer_norm(axes = var_670_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_670_cast_fp16")]; + tensor var_681_to_fp16 = const()[name = string("op_681_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(211433216)))]; + tensor var_682_to_fp16 = const()[name = string("op_682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214710080)))]; + tensor linear_30_cast_fp16 = linear(bias = var_682_to_fp16, weight = var_681_to_fp16, x = var_670_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor var_685_to_fp16 = const()[name = string("op_685_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(214712704)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_685_to_fp16, x = var_670_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor var_689_to_fp16 = const()[name = string("op_689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(217989568)))]; + tensor var_690_to_fp16 = const()[name = string("op_690_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(221266432)))]; + tensor linear_32_cast_fp16 = linear(bias = var_690_to_fp16, weight = var_689_to_fp16, x = var_670_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([1, 1500, 20, -1])]; + tensor var_699_cast_fp16 = reshape(shape = var_698, x = linear_30_cast_fp16)[name = string("op_699_cast_fp16")]; + tensor const_234_to_fp16 = const()[name = string("const_234_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_699_cast_fp16, y = const_234_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_705 = const()[name = string("op_705"), val = tensor([1, 1500, 20, -1])]; + tensor var_706_cast_fp16 = reshape(shape = var_705, x = linear_31_cast_fp16)[name = string("op_706_cast_fp16")]; + tensor const_235_to_fp16 = const()[name = string("const_235_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_23_cast_fp16 = mul(x = var_706_cast_fp16, y = const_235_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_712 = const()[name = string("op_712"), val = tensor([1, 1500, 20, -1])]; + tensor var_713_cast_fp16 = reshape(shape = var_712, x = linear_32_cast_fp16)[name = string("op_713_cast_fp16")]; + tensor var_714 = const()[name = string("op_714"), val = tensor([0, 2, -3, -1])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = k_23_cast_fp16)[name = string("transpose_138")]; + tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = q_23_cast_fp16)[name = string("transpose_139")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_11_cast_fp16")]; + tensor var_718_cast_fp16 = softmax(axis = var_654, x = qk_11_cast_fp16)[name = string("op_718_cast_fp16")]; + bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)]; + bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)]; + tensor v_23_cast_fp16 = transpose(perm = var_714, x = var_713_cast_fp16)[name = string("transpose_137")]; + tensor var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = var_718_cast_fp16, y = v_23_cast_fp16)[name = string("op_720_cast_fp16")]; + tensor var_721 = const()[name = string("op_721"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([1, 1500, 1280])]; + tensor var_722_cast_fp16 = transpose(perm = var_721, x = var_720_cast_fp16)[name = string("transpose_136")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = var_722_cast_fp16)[name = string("x_71_cast_fp16")]; + tensor var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(221269056)))]; + tensor var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224545920)))]; + tensor linear_33_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_734_axes_0 = const()[name = string("op_734_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224548544)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224551168)))]; + tensor var_734_cast_fp16 = layer_norm(axes = var_734_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_660_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_734_cast_fp16")]; + tensor var_743_to_fp16 = const()[name = string("op_743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(224553792)))]; + tensor var_744_to_fp16 = const()[name = string("op_744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237661056)))]; + tensor linear_34_cast_fp16 = linear(bias = var_744_to_fp16, weight = var_743_to_fp16, x = var_734_cast_fp16)[name = string("linear_34_cast_fp16")]; + string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")]; + tensor var_749_to_fp16 = const()[name = string("op_749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(237671360)))]; + tensor var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250778624)))]; + tensor linear_35_cast_fp16 = linear(bias = var_750_to_fp16, weight = var_749_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_760 = const()[name = string("op_760"), val = int32(-1)]; + tensor var_776_axes_0 = const()[name = string("op_776_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250781248)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250783872)))]; + fp16 var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_776_cast_fp16 = layer_norm(axes = var_776_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_776_cast_fp16")]; + tensor var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(250786496)))]; + tensor var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(254063360)))]; + tensor linear_36_cast_fp16 = linear(bias = var_788_to_fp16, weight = var_787_to_fp16, x = var_776_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor var_791_to_fp16 = const()[name = string("op_791_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(254065984)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_791_to_fp16, x = var_776_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(257342848)))]; + tensor var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260619712)))]; + tensor linear_38_cast_fp16 = linear(bias = var_796_to_fp16, weight = var_795_to_fp16, x = var_776_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor var_804 = const()[name = string("op_804"), val = tensor([1, 1500, 20, -1])]; + tensor var_805_cast_fp16 = reshape(shape = var_804, x = linear_36_cast_fp16)[name = string("op_805_cast_fp16")]; + tensor const_236_to_fp16 = const()[name = string("const_236_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_805_cast_fp16, y = const_236_to_fp16)[name = string("q_27_cast_fp16")]; + tensor var_811 = const()[name = string("op_811"), val = tensor([1, 1500, 20, -1])]; + tensor var_812_cast_fp16 = reshape(shape = var_811, x = linear_37_cast_fp16)[name = string("op_812_cast_fp16")]; + tensor const_237_to_fp16 = const()[name = string("const_237_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_27_cast_fp16 = mul(x = var_812_cast_fp16, y = const_237_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_818 = const()[name = string("op_818"), val = tensor([1, 1500, 20, -1])]; + tensor var_819_cast_fp16 = reshape(shape = var_818, x = linear_38_cast_fp16)[name = string("op_819_cast_fp16")]; + tensor var_820 = const()[name = string("op_820"), val = tensor([0, 2, -3, -1])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = k_27_cast_fp16)[name = string("transpose_134")]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = q_27_cast_fp16)[name = string("transpose_135")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_13_cast_fp16")]; + tensor var_824_cast_fp16 = softmax(axis = var_760, x = qk_13_cast_fp16)[name = string("op_824_cast_fp16")]; + bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)]; + bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)]; + tensor v_27_cast_fp16 = transpose(perm = var_820, x = var_819_cast_fp16)[name = string("transpose_133")]; + tensor var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_27_cast_fp16)[name = string("op_826_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([0, 2, 1, 3])]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([1, 1500, 1280])]; + tensor var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_132")]; + tensor x_83_cast_fp16 = reshape(shape = concat_6, x = var_828_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(260622336)))]; + tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263899200)))]; + tensor linear_39_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263901824)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263904448)))]; + tensor var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_766_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(263907072)))]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(277014336)))]; + tensor linear_40_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_40_cast_fp16")]; + string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")]; + tensor x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_855_to_fp16 = const()[name = string("op_855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(277024640)))]; + tensor var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290131904)))]; + tensor linear_41_cast_fp16 = linear(bias = var_856_to_fp16, weight = var_855_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")]; + int32 var_866 = const()[name = string("op_866"), val = int32(-1)]; + tensor var_882_axes_0 = const()[name = string("op_882_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290134528)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290137152)))]; + fp16 var_872_to_fp16 = const()[name = string("op_872_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_882_cast_fp16 = layer_norm(axes = var_882_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_882_cast_fp16")]; + tensor var_893_to_fp16 = const()[name = string("op_893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(290139776)))]; + tensor var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293416640)))]; + tensor linear_42_cast_fp16 = linear(bias = var_894_to_fp16, weight = var_893_to_fp16, x = var_882_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(293419264)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_897_to_fp16, x = var_882_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(296696128)))]; + tensor var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299972992)))]; + tensor linear_44_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = var_882_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor var_910 = const()[name = string("op_910"), val = tensor([1, 1500, 20, -1])]; + tensor var_911_cast_fp16 = reshape(shape = var_910, x = linear_42_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor const_238_to_fp16 = const()[name = string("const_238_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_911_cast_fp16, y = const_238_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_917 = const()[name = string("op_917"), val = tensor([1, 1500, 20, -1])]; + tensor var_918_cast_fp16 = reshape(shape = var_917, x = linear_43_cast_fp16)[name = string("op_918_cast_fp16")]; + tensor const_239_to_fp16 = const()[name = string("const_239_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_239_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([1, 1500, 20, -1])]; + tensor var_925_cast_fp16 = reshape(shape = var_924, x = linear_44_cast_fp16)[name = string("op_925_cast_fp16")]; + tensor var_926 = const()[name = string("op_926"), val = tensor([0, 2, -3, -1])]; + bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)]; + bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = k_31_cast_fp16)[name = string("transpose_130")]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = q_31_cast_fp16)[name = string("transpose_131")]; + tensor qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_15_cast_fp16")]; + tensor var_930_cast_fp16 = softmax(axis = var_866, x = qk_15_cast_fp16)[name = string("op_930_cast_fp16")]; + bool var_932_transpose_x_0 = const()[name = string("op_932_transpose_x_0"), val = bool(false)]; + bool var_932_transpose_y_0 = const()[name = string("op_932_transpose_y_0"), val = bool(false)]; + tensor v_31_cast_fp16 = transpose(perm = var_926, x = var_925_cast_fp16)[name = string("transpose_129")]; + tensor var_932_cast_fp16 = matmul(transpose_x = var_932_transpose_x_0, transpose_y = var_932_transpose_y_0, x = var_930_cast_fp16, y = v_31_cast_fp16)[name = string("op_932_cast_fp16")]; + tensor var_933 = const()[name = string("op_933"), val = tensor([0, 2, 1, 3])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([1, 1500, 1280])]; + tensor var_934_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_128")]; + tensor x_95_cast_fp16 = reshape(shape = concat_7, x = var_934_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(299975616)))]; + tensor var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303252480)))]; + tensor linear_45_cast_fp16 = linear(bias = var_939_to_fp16, weight = var_938_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_946_axes_0 = const()[name = string("op_946_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303255104)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303257728)))]; + tensor var_946_cast_fp16 = layer_norm(axes = var_946_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_872_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_946_cast_fp16")]; + tensor var_955_to_fp16 = const()[name = string("op_955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(303260352)))]; + tensor var_956_to_fp16 = const()[name = string("op_956_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316367616)))]; + tensor linear_46_cast_fp16 = linear(bias = var_956_to_fp16, weight = var_955_to_fp16, x = var_946_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")]; + tensor x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_961_to_fp16 = const()[name = string("op_961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(316377920)))]; + tensor var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329485184)))]; + tensor linear_47_cast_fp16 = linear(bias = var_962_to_fp16, weight = var_961_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")]; + int32 var_972 = const()[name = string("op_972"), val = int32(-1)]; + tensor var_988_axes_0 = const()[name = string("op_988_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329487808)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329490432)))]; + fp16 var_978_to_fp16 = const()[name = string("op_978_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_988_cast_fp16 = layer_norm(axes = var_988_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_988_cast_fp16")]; + tensor var_999_to_fp16 = const()[name = string("op_999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(329493056)))]; + tensor var_1000_to_fp16 = const()[name = string("op_1000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332769920)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1000_to_fp16, weight = var_999_to_fp16, x = var_988_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1003_to_fp16 = const()[name = string("op_1003_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(332772544)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1003_to_fp16, x = var_988_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1007_to_fp16 = const()[name = string("op_1007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(336049408)))]; + tensor var_1008_to_fp16 = const()[name = string("op_1008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(339326272)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1008_to_fp16, weight = var_1007_to_fp16, x = var_988_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([1, 1500, 20, -1])]; + tensor var_1017_cast_fp16 = reshape(shape = var_1016, x = linear_48_cast_fp16)[name = string("op_1017_cast_fp16")]; + tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1017_cast_fp16, y = const_240_to_fp16)[name = string("q_35_cast_fp16")]; + tensor var_1023 = const()[name = string("op_1023"), val = tensor([1, 1500, 20, -1])]; + tensor var_1024_cast_fp16 = reshape(shape = var_1023, x = linear_49_cast_fp16)[name = string("op_1024_cast_fp16")]; + tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_1024_cast_fp16, y = const_241_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_1030 = const()[name = string("op_1030"), val = tensor([1, 1500, 20, -1])]; + tensor var_1031_cast_fp16 = reshape(shape = var_1030, x = linear_50_cast_fp16)[name = string("op_1031_cast_fp16")]; + tensor var_1032 = const()[name = string("op_1032"), val = tensor([0, 2, -3, -1])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = k_35_cast_fp16)[name = string("transpose_126")]; + tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = q_35_cast_fp16)[name = string("transpose_127")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_17_cast_fp16")]; + tensor var_1036_cast_fp16 = softmax(axis = var_972, x = qk_17_cast_fp16)[name = string("op_1036_cast_fp16")]; + bool var_1038_transpose_x_0 = const()[name = string("op_1038_transpose_x_0"), val = bool(false)]; + bool var_1038_transpose_y_0 = const()[name = string("op_1038_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_1032, x = var_1031_cast_fp16)[name = string("transpose_125")]; + tensor var_1038_cast_fp16 = matmul(transpose_x = var_1038_transpose_x_0, transpose_y = var_1038_transpose_y_0, x = var_1036_cast_fp16, y = v_35_cast_fp16)[name = string("op_1038_cast_fp16")]; + tensor var_1039 = const()[name = string("op_1039"), val = tensor([0, 2, 1, 3])]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 1500, 1280])]; + tensor var_1040_cast_fp16 = transpose(perm = var_1039, x = var_1038_cast_fp16)[name = string("transpose_124")]; + tensor x_107_cast_fp16 = reshape(shape = concat_8, x = var_1040_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(339328896)))]; + tensor var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342605760)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1045_to_fp16, weight = var_1044_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1052_axes_0 = const()[name = string("op_1052_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342608384)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342611008)))]; + tensor var_1052_cast_fp16 = layer_norm(axes = var_1052_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_978_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1052_cast_fp16")]; + tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(342613632)))]; + tensor var_1062_to_fp16 = const()[name = string("op_1062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355720896)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1062_to_fp16, weight = var_1061_to_fp16, x = var_1052_cast_fp16)[name = string("linear_52_cast_fp16")]; + string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")]; + tensor x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")]; + tensor var_1067_to_fp16 = const()[name = string("op_1067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(355731200)))]; + tensor var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368838464)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1068_to_fp16, weight = var_1067_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")]; + int32 var_1078 = const()[name = string("op_1078"), val = int32(-1)]; + tensor var_1094_axes_0 = const()[name = string("op_1094_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368841088)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368843712)))]; + fp16 var_1084_to_fp16 = const()[name = string("op_1084_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1094_cast_fp16 = layer_norm(axes = var_1094_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1094_cast_fp16")]; + tensor var_1105_to_fp16 = const()[name = string("op_1105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(368846336)))]; + tensor var_1106_to_fp16 = const()[name = string("op_1106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(372123200)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1106_to_fp16, weight = var_1105_to_fp16, x = var_1094_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(372125824)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1109_to_fp16, x = var_1094_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor var_1113_to_fp16 = const()[name = string("op_1113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(375402688)))]; + tensor var_1114_to_fp16 = const()[name = string("op_1114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378679552)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1114_to_fp16, weight = var_1113_to_fp16, x = var_1094_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1122 = const()[name = string("op_1122"), val = tensor([1, 1500, 20, -1])]; + tensor var_1123_cast_fp16 = reshape(shape = var_1122, x = linear_54_cast_fp16)[name = string("op_1123_cast_fp16")]; + tensor const_242_to_fp16 = const()[name = string("const_242_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1123_cast_fp16, y = const_242_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([1, 1500, 20, -1])]; + tensor var_1130_cast_fp16 = reshape(shape = var_1129, x = linear_55_cast_fp16)[name = string("op_1130_cast_fp16")]; + tensor const_243_to_fp16 = const()[name = string("const_243_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_1130_cast_fp16, y = const_243_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_1136 = const()[name = string("op_1136"), val = tensor([1, 1500, 20, -1])]; + tensor var_1137_cast_fp16 = reshape(shape = var_1136, x = linear_56_cast_fp16)[name = string("op_1137_cast_fp16")]; + tensor var_1138 = const()[name = string("op_1138"), val = tensor([0, 2, -3, -1])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = k_39_cast_fp16)[name = string("transpose_122")]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = q_39_cast_fp16)[name = string("transpose_123")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_19_cast_fp16")]; + tensor var_1142_cast_fp16 = softmax(axis = var_1078, x = qk_19_cast_fp16)[name = string("op_1142_cast_fp16")]; + bool var_1144_transpose_x_0 = const()[name = string("op_1144_transpose_x_0"), val = bool(false)]; + bool var_1144_transpose_y_0 = const()[name = string("op_1144_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_1138, x = var_1137_cast_fp16)[name = string("transpose_121")]; + tensor var_1144_cast_fp16 = matmul(transpose_x = var_1144_transpose_x_0, transpose_y = var_1144_transpose_y_0, x = var_1142_cast_fp16, y = v_39_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor var_1145 = const()[name = string("op_1145"), val = tensor([0, 2, 1, 3])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([1, 1500, 1280])]; + tensor var_1146_cast_fp16 = transpose(perm = var_1145, x = var_1144_cast_fp16)[name = string("transpose_120")]; + tensor x_119_cast_fp16 = reshape(shape = concat_9, x = var_1146_cast_fp16)[name = string("x_119_cast_fp16")]; + tensor var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(378682176)))]; + tensor var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381959040)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1151_to_fp16, weight = var_1150_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1158_axes_0 = const()[name = string("op_1158_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381961664)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381964288)))]; + tensor var_1158_cast_fp16 = layer_norm(axes = var_1158_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1084_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1158_cast_fp16")]; + tensor var_1167_to_fp16 = const()[name = string("op_1167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(381966912)))]; + tensor var_1168_to_fp16 = const()[name = string("op_1168_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(395074176)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1168_to_fp16, weight = var_1167_to_fp16, x = var_1158_cast_fp16)[name = string("linear_58_cast_fp16")]; + string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")]; + tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(395084480)))]; + tensor var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408191744)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")]; + int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)]; + tensor var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408194368)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408196992)))]; + fp16 var_1190_to_fp16 = const()[name = string("op_1190_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1200_cast_fp16 = layer_norm(axes = var_1200_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1200_cast_fp16")]; + tensor var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(408199616)))]; + tensor var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411476480)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1212_to_fp16, weight = var_1211_to_fp16, x = var_1200_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1215_to_fp16 = const()[name = string("op_1215_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(411479104)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1215_to_fp16, x = var_1200_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor var_1219_to_fp16 = const()[name = string("op_1219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(414755968)))]; + tensor var_1220_to_fp16 = const()[name = string("op_1220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(418032832)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1220_to_fp16, weight = var_1219_to_fp16, x = var_1200_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor var_1228 = const()[name = string("op_1228"), val = tensor([1, 1500, 20, -1])]; + tensor var_1229_cast_fp16 = reshape(shape = var_1228, x = linear_60_cast_fp16)[name = string("op_1229_cast_fp16")]; + tensor const_244_to_fp16 = const()[name = string("const_244_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1229_cast_fp16, y = const_244_to_fp16)[name = string("q_43_cast_fp16")]; + tensor var_1235 = const()[name = string("op_1235"), val = tensor([1, 1500, 20, -1])]; + tensor var_1236_cast_fp16 = reshape(shape = var_1235, x = linear_61_cast_fp16)[name = string("op_1236_cast_fp16")]; + tensor const_245_to_fp16 = const()[name = string("const_245_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_43_cast_fp16 = mul(x = var_1236_cast_fp16, y = const_245_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_1242 = const()[name = string("op_1242"), val = tensor([1, 1500, 20, -1])]; + tensor var_1243_cast_fp16 = reshape(shape = var_1242, x = linear_62_cast_fp16)[name = string("op_1243_cast_fp16")]; + tensor var_1244 = const()[name = string("op_1244"), val = tensor([0, 2, -3, -1])]; + bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)]; + bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = k_43_cast_fp16)[name = string("transpose_118")]; + tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = q_43_cast_fp16)[name = string("transpose_119")]; + tensor qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_21_cast_fp16")]; + tensor var_1248_cast_fp16 = softmax(axis = var_1184, x = qk_21_cast_fp16)[name = string("op_1248_cast_fp16")]; + bool var_1250_transpose_x_0 = const()[name = string("op_1250_transpose_x_0"), val = bool(false)]; + bool var_1250_transpose_y_0 = const()[name = string("op_1250_transpose_y_0"), val = bool(false)]; + tensor v_43_cast_fp16 = transpose(perm = var_1244, x = var_1243_cast_fp16)[name = string("transpose_117")]; + tensor var_1250_cast_fp16 = matmul(transpose_x = var_1250_transpose_x_0, transpose_y = var_1250_transpose_y_0, x = var_1248_cast_fp16, y = v_43_cast_fp16)[name = string("op_1250_cast_fp16")]; + tensor var_1251 = const()[name = string("op_1251"), val = tensor([0, 2, 1, 3])]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([1, 1500, 1280])]; + tensor var_1252_cast_fp16 = transpose(perm = var_1251, x = var_1250_cast_fp16)[name = string("transpose_116")]; + tensor x_131_cast_fp16 = reshape(shape = concat_10, x = var_1252_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(418035456)))]; + tensor var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421312320)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1257_to_fp16, weight = var_1256_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1264_axes_0 = const()[name = string("op_1264_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421314944)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421317568)))]; + tensor var_1264_cast_fp16 = layer_norm(axes = var_1264_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1190_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1264_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(421320192)))]; + tensor var_1274_to_fp16 = const()[name = string("op_1274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434427456)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1274_to_fp16, weight = var_1273_to_fp16, x = var_1264_cast_fp16)[name = string("linear_64_cast_fp16")]; + string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")]; + tensor x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_1279_to_fp16 = const()[name = string("op_1279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(434437760)))]; + tensor var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447545024)))]; + tensor linear_65_cast_fp16 = linear(bias = var_1280_to_fp16, weight = var_1279_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")]; + int32 var_1290 = const()[name = string("op_1290"), val = int32(-1)]; + tensor var_1306_axes_0 = const()[name = string("op_1306_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447547648)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447550272)))]; + fp16 var_1296_to_fp16 = const()[name = string("op_1296_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1306_cast_fp16 = layer_norm(axes = var_1306_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1306_cast_fp16")]; + tensor var_1317_to_fp16 = const()[name = string("op_1317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(447552896)))]; + tensor var_1318_to_fp16 = const()[name = string("op_1318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450829760)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1318_to_fp16, weight = var_1317_to_fp16, x = var_1306_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1321_to_fp16 = const()[name = string("op_1321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(450832384)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1321_to_fp16, x = var_1306_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1325_to_fp16 = const()[name = string("op_1325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(454109248)))]; + tensor var_1326_to_fp16 = const()[name = string("op_1326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457386112)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1326_to_fp16, weight = var_1325_to_fp16, x = var_1306_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor var_1334 = const()[name = string("op_1334"), val = tensor([1, 1500, 20, -1])]; + tensor var_1335_cast_fp16 = reshape(shape = var_1334, x = linear_66_cast_fp16)[name = string("op_1335_cast_fp16")]; + tensor const_246_to_fp16 = const()[name = string("const_246_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1335_cast_fp16, y = const_246_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1341 = const()[name = string("op_1341"), val = tensor([1, 1500, 20, -1])]; + tensor var_1342_cast_fp16 = reshape(shape = var_1341, x = linear_67_cast_fp16)[name = string("op_1342_cast_fp16")]; + tensor const_247_to_fp16 = const()[name = string("const_247_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_47_cast_fp16 = mul(x = var_1342_cast_fp16, y = const_247_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_1348 = const()[name = string("op_1348"), val = tensor([1, 1500, 20, -1])]; + tensor var_1349_cast_fp16 = reshape(shape = var_1348, x = linear_68_cast_fp16)[name = string("op_1349_cast_fp16")]; + tensor var_1350 = const()[name = string("op_1350"), val = tensor([0, 2, -3, -1])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = k_47_cast_fp16)[name = string("transpose_114")]; + tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = q_47_cast_fp16)[name = string("transpose_115")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_23_cast_fp16")]; + tensor var_1354_cast_fp16 = softmax(axis = var_1290, x = qk_23_cast_fp16)[name = string("op_1354_cast_fp16")]; + bool var_1356_transpose_x_0 = const()[name = string("op_1356_transpose_x_0"), val = bool(false)]; + bool var_1356_transpose_y_0 = const()[name = string("op_1356_transpose_y_0"), val = bool(false)]; + tensor v_47_cast_fp16 = transpose(perm = var_1350, x = var_1349_cast_fp16)[name = string("transpose_113")]; + tensor var_1356_cast_fp16 = matmul(transpose_x = var_1356_transpose_x_0, transpose_y = var_1356_transpose_y_0, x = var_1354_cast_fp16, y = v_47_cast_fp16)[name = string("op_1356_cast_fp16")]; + tensor var_1357 = const()[name = string("op_1357"), val = tensor([0, 2, 1, 3])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 1500, 1280])]; + tensor var_1358_cast_fp16 = transpose(perm = var_1357, x = var_1356_cast_fp16)[name = string("transpose_112")]; + tensor x_143_cast_fp16 = reshape(shape = concat_11, x = var_1358_cast_fp16)[name = string("x_143_cast_fp16")]; + tensor var_1362_to_fp16 = const()[name = string("op_1362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(457388736)))]; + tensor var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460665600)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1363_to_fp16, weight = var_1362_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1370_axes_0 = const()[name = string("op_1370_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460668224)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460670848)))]; + tensor var_1370_cast_fp16 = layer_norm(axes = var_1370_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1296_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1370_cast_fp16")]; + tensor var_1379_to_fp16 = const()[name = string("op_1379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(460673472)))]; + tensor var_1380_to_fp16 = const()[name = string("op_1380_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473780736)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1380_to_fp16, weight = var_1379_to_fp16, x = var_1370_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")]; + tensor x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(473791040)))]; + tensor var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486898304)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1386_to_fp16, weight = var_1385_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")]; + int32 var_1396 = const()[name = string("op_1396"), val = int32(-1)]; + tensor var_1412_axes_0 = const()[name = string("op_1412_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486900928)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486903552)))]; + fp16 var_1402_to_fp16 = const()[name = string("op_1402_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1412_cast_fp16 = layer_norm(axes = var_1412_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1412_cast_fp16")]; + tensor var_1423_to_fp16 = const()[name = string("op_1423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(486906176)))]; + tensor var_1424_to_fp16 = const()[name = string("op_1424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(490183040)))]; + tensor linear_72_cast_fp16 = linear(bias = var_1424_to_fp16, weight = var_1423_to_fp16, x = var_1412_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_1427_to_fp16 = const()[name = string("op_1427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(490185664)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1427_to_fp16, x = var_1412_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_1431_to_fp16 = const()[name = string("op_1431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(493462528)))]; + tensor var_1432_to_fp16 = const()[name = string("op_1432_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496739392)))]; + tensor linear_74_cast_fp16 = linear(bias = var_1432_to_fp16, weight = var_1431_to_fp16, x = var_1412_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_1440 = const()[name = string("op_1440"), val = tensor([1, 1500, 20, -1])]; + tensor var_1441_cast_fp16 = reshape(shape = var_1440, x = linear_72_cast_fp16)[name = string("op_1441_cast_fp16")]; + tensor const_248_to_fp16 = const()[name = string("const_248_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1441_cast_fp16, y = const_248_to_fp16)[name = string("q_51_cast_fp16")]; + tensor var_1447 = const()[name = string("op_1447"), val = tensor([1, 1500, 20, -1])]; + tensor var_1448_cast_fp16 = reshape(shape = var_1447, x = linear_73_cast_fp16)[name = string("op_1448_cast_fp16")]; + tensor const_249_to_fp16 = const()[name = string("const_249_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_51_cast_fp16 = mul(x = var_1448_cast_fp16, y = const_249_to_fp16)[name = string("k_51_cast_fp16")]; + tensor var_1454 = const()[name = string("op_1454"), val = tensor([1, 1500, 20, -1])]; + tensor var_1455_cast_fp16 = reshape(shape = var_1454, x = linear_74_cast_fp16)[name = string("op_1455_cast_fp16")]; + tensor var_1456 = const()[name = string("op_1456"), val = tensor([0, 2, -3, -1])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = k_51_cast_fp16)[name = string("transpose_110")]; + tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = q_51_cast_fp16)[name = string("transpose_111")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_25_cast_fp16")]; + tensor var_1460_cast_fp16 = softmax(axis = var_1396, x = qk_25_cast_fp16)[name = string("op_1460_cast_fp16")]; + bool var_1462_transpose_x_0 = const()[name = string("op_1462_transpose_x_0"), val = bool(false)]; + bool var_1462_transpose_y_0 = const()[name = string("op_1462_transpose_y_0"), val = bool(false)]; + tensor v_51_cast_fp16 = transpose(perm = var_1456, x = var_1455_cast_fp16)[name = string("transpose_109")]; + tensor var_1462_cast_fp16 = matmul(transpose_x = var_1462_transpose_x_0, transpose_y = var_1462_transpose_y_0, x = var_1460_cast_fp16, y = v_51_cast_fp16)[name = string("op_1462_cast_fp16")]; + tensor var_1463 = const()[name = string("op_1463"), val = tensor([0, 2, 1, 3])]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([1, 1500, 1280])]; + tensor var_1464_cast_fp16 = transpose(perm = var_1463, x = var_1462_cast_fp16)[name = string("transpose_108")]; + tensor x_155_cast_fp16 = reshape(shape = concat_12, x = var_1464_cast_fp16)[name = string("x_155_cast_fp16")]; + tensor var_1468_to_fp16 = const()[name = string("op_1468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(496742016)))]; + tensor var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500018880)))]; + tensor linear_75_cast_fp16 = linear(bias = var_1469_to_fp16, weight = var_1468_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1476_axes_0 = const()[name = string("op_1476_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500021504)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500024128)))]; + tensor var_1476_cast_fp16 = layer_norm(axes = var_1476_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1402_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1476_cast_fp16")]; + tensor var_1485_to_fp16 = const()[name = string("op_1485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(500026752)))]; + tensor var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(513134016)))]; + tensor linear_76_cast_fp16 = linear(bias = var_1486_to_fp16, weight = var_1485_to_fp16, x = var_1476_cast_fp16)[name = string("linear_76_cast_fp16")]; + string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")]; + tensor x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")]; + tensor var_1491_to_fp16 = const()[name = string("op_1491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(513144320)))]; + tensor var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526251584)))]; + tensor linear_77_cast_fp16 = linear(bias = var_1492_to_fp16, weight = var_1491_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")]; + int32 var_1502 = const()[name = string("op_1502"), val = int32(-1)]; + tensor var_1518_axes_0 = const()[name = string("op_1518_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526254208)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526256832)))]; + fp16 var_1508_to_fp16 = const()[name = string("op_1508_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1518_cast_fp16 = layer_norm(axes = var_1518_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1518_cast_fp16")]; + tensor var_1529_to_fp16 = const()[name = string("op_1529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(526259456)))]; + tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529536320)))]; + tensor linear_78_cast_fp16 = linear(bias = var_1530_to_fp16, weight = var_1529_to_fp16, x = var_1518_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor var_1533_to_fp16 = const()[name = string("op_1533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(529538944)))]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1533_to_fp16, x = var_1518_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor var_1537_to_fp16 = const()[name = string("op_1537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(532815808)))]; + tensor var_1538_to_fp16 = const()[name = string("op_1538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(536092672)))]; + tensor linear_80_cast_fp16 = linear(bias = var_1538_to_fp16, weight = var_1537_to_fp16, x = var_1518_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([1, 1500, 20, -1])]; + tensor var_1547_cast_fp16 = reshape(shape = var_1546, x = linear_78_cast_fp16)[name = string("op_1547_cast_fp16")]; + tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1547_cast_fp16, y = const_250_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1553 = const()[name = string("op_1553"), val = tensor([1, 1500, 20, -1])]; + tensor var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_79_cast_fp16)[name = string("op_1554_cast_fp16")]; + tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1554_cast_fp16, y = const_251_to_fp16)[name = string("k_55_cast_fp16")]; + tensor var_1560 = const()[name = string("op_1560"), val = tensor([1, 1500, 20, -1])]; + tensor var_1561_cast_fp16 = reshape(shape = var_1560, x = linear_80_cast_fp16)[name = string("op_1561_cast_fp16")]; + tensor var_1562 = const()[name = string("op_1562"), val = tensor([0, 2, -3, -1])]; + bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)]; + bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = k_55_cast_fp16)[name = string("transpose_106")]; + tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = q_55_cast_fp16)[name = string("transpose_107")]; + tensor qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_27_cast_fp16")]; + tensor var_1566_cast_fp16 = softmax(axis = var_1502, x = qk_27_cast_fp16)[name = string("op_1566_cast_fp16")]; + bool var_1568_transpose_x_0 = const()[name = string("op_1568_transpose_x_0"), val = bool(false)]; + bool var_1568_transpose_y_0 = const()[name = string("op_1568_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1562, x = var_1561_cast_fp16)[name = string("transpose_105")]; + tensor var_1568_cast_fp16 = matmul(transpose_x = var_1568_transpose_x_0, transpose_y = var_1568_transpose_y_0, x = var_1566_cast_fp16, y = v_55_cast_fp16)[name = string("op_1568_cast_fp16")]; + tensor var_1569 = const()[name = string("op_1569"), val = tensor([0, 2, 1, 3])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([1, 1500, 1280])]; + tensor var_1570_cast_fp16 = transpose(perm = var_1569, x = var_1568_cast_fp16)[name = string("transpose_104")]; + tensor x_167_cast_fp16 = reshape(shape = concat_13, x = var_1570_cast_fp16)[name = string("x_167_cast_fp16")]; + tensor var_1574_to_fp16 = const()[name = string("op_1574_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(536095296)))]; + tensor var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539372160)))]; + tensor linear_81_cast_fp16 = linear(bias = var_1575_to_fp16, weight = var_1574_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_1582_axes_0 = const()[name = string("op_1582_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539374784)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539377408)))]; + tensor var_1582_cast_fp16 = layer_norm(axes = var_1582_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1508_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1582_cast_fp16")]; + tensor var_1591_to_fp16 = const()[name = string("op_1591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(539380032)))]; + tensor var_1592_to_fp16 = const()[name = string("op_1592_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552487296)))]; + tensor linear_82_cast_fp16 = linear(bias = var_1592_to_fp16, weight = var_1591_to_fp16, x = var_1582_cast_fp16)[name = string("linear_82_cast_fp16")]; + string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")]; + tensor x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")]; + tensor var_1597_to_fp16 = const()[name = string("op_1597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(552497600)))]; + tensor var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565604864)))]; + tensor linear_83_cast_fp16 = linear(bias = var_1598_to_fp16, weight = var_1597_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")]; + int32 var_1608 = const()[name = string("op_1608"), val = int32(-1)]; + tensor var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565607488)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565610112)))]; + fp16 var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1624_cast_fp16 = layer_norm(axes = var_1624_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1624_cast_fp16")]; + tensor var_1635_to_fp16 = const()[name = string("op_1635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(565612736)))]; + tensor var_1636_to_fp16 = const()[name = string("op_1636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568889600)))]; + tensor linear_84_cast_fp16 = linear(bias = var_1636_to_fp16, weight = var_1635_to_fp16, x = var_1624_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor var_1639_to_fp16 = const()[name = string("op_1639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(568892224)))]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1639_to_fp16, x = var_1624_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(572169088)))]; + tensor var_1644_to_fp16 = const()[name = string("op_1644_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575445952)))]; + tensor linear_86_cast_fp16 = linear(bias = var_1644_to_fp16, weight = var_1643_to_fp16, x = var_1624_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor var_1652 = const()[name = string("op_1652"), val = tensor([1, 1500, 20, -1])]; + tensor var_1653_cast_fp16 = reshape(shape = var_1652, x = linear_84_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor const_252_to_fp16 = const()[name = string("const_252_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_252_to_fp16)[name = string("q_59_cast_fp16")]; + tensor var_1659 = const()[name = string("op_1659"), val = tensor([1, 1500, 20, -1])]; + tensor var_1660_cast_fp16 = reshape(shape = var_1659, x = linear_85_cast_fp16)[name = string("op_1660_cast_fp16")]; + tensor const_253_to_fp16 = const()[name = string("const_253_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1660_cast_fp16, y = const_253_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1666 = const()[name = string("op_1666"), val = tensor([1, 1500, 20, -1])]; + tensor var_1667_cast_fp16 = reshape(shape = var_1666, x = linear_86_cast_fp16)[name = string("op_1667_cast_fp16")]; + tensor var_1668 = const()[name = string("op_1668"), val = tensor([0, 2, -3, -1])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = k_59_cast_fp16)[name = string("transpose_102")]; + tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = q_59_cast_fp16)[name = string("transpose_103")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_29_cast_fp16")]; + tensor var_1672_cast_fp16 = softmax(axis = var_1608, x = qk_29_cast_fp16)[name = string("op_1672_cast_fp16")]; + bool var_1674_transpose_x_0 = const()[name = string("op_1674_transpose_x_0"), val = bool(false)]; + bool var_1674_transpose_y_0 = const()[name = string("op_1674_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1668, x = var_1667_cast_fp16)[name = string("transpose_101")]; + tensor var_1674_cast_fp16 = matmul(transpose_x = var_1674_transpose_x_0, transpose_y = var_1674_transpose_y_0, x = var_1672_cast_fp16, y = v_59_cast_fp16)[name = string("op_1674_cast_fp16")]; + tensor var_1675 = const()[name = string("op_1675"), val = tensor([0, 2, 1, 3])]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 1500, 1280])]; + tensor var_1676_cast_fp16 = transpose(perm = var_1675, x = var_1674_cast_fp16)[name = string("transpose_100")]; + tensor x_179_cast_fp16 = reshape(shape = concat_14, x = var_1676_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(575448576)))]; + tensor var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578725440)))]; + tensor linear_87_cast_fp16 = linear(bias = var_1681_to_fp16, weight = var_1680_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1688_axes_0 = const()[name = string("op_1688_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578728064)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578730688)))]; + tensor var_1688_cast_fp16 = layer_norm(axes = var_1688_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1614_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1688_cast_fp16")]; + tensor var_1697_to_fp16 = const()[name = string("op_1697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(578733312)))]; + tensor var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591840576)))]; + tensor linear_88_cast_fp16 = linear(bias = var_1698_to_fp16, weight = var_1697_to_fp16, x = var_1688_cast_fp16)[name = string("linear_88_cast_fp16")]; + string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")]; + tensor x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1703_to_fp16 = const()[name = string("op_1703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(591850880)))]; + tensor var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604958144)))]; + tensor linear_89_cast_fp16 = linear(bias = var_1704_to_fp16, weight = var_1703_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")]; + int32 var_1714 = const()[name = string("op_1714"), val = int32(-1)]; + tensor var_1730_axes_0 = const()[name = string("op_1730_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604960768)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604963392)))]; + fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1730_cast_fp16 = layer_norm(axes = var_1730_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1730_cast_fp16")]; + tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(604966016)))]; + tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(608242880)))]; + tensor linear_90_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1730_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_1745_to_fp16 = const()[name = string("op_1745_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(608245504)))]; + tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1745_to_fp16, x = var_1730_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor var_1749_to_fp16 = const()[name = string("op_1749_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(611522368)))]; + tensor var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614799232)))]; + tensor linear_92_cast_fp16 = linear(bias = var_1750_to_fp16, weight = var_1749_to_fp16, x = var_1730_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor var_1758 = const()[name = string("op_1758"), val = tensor([1, 1500, 20, -1])]; + tensor var_1759_cast_fp16 = reshape(shape = var_1758, x = linear_90_cast_fp16)[name = string("op_1759_cast_fp16")]; + tensor const_254_to_fp16 = const()[name = string("const_254_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1759_cast_fp16, y = const_254_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1765 = const()[name = string("op_1765"), val = tensor([1, 1500, 20, -1])]; + tensor var_1766_cast_fp16 = reshape(shape = var_1765, x = linear_91_cast_fp16)[name = string("op_1766_cast_fp16")]; + tensor const_255_to_fp16 = const()[name = string("const_255_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_63_cast_fp16 = mul(x = var_1766_cast_fp16, y = const_255_to_fp16)[name = string("k_63_cast_fp16")]; + tensor var_1772 = const()[name = string("op_1772"), val = tensor([1, 1500, 20, -1])]; + tensor var_1773_cast_fp16 = reshape(shape = var_1772, x = linear_92_cast_fp16)[name = string("op_1773_cast_fp16")]; + tensor var_1774 = const()[name = string("op_1774"), val = tensor([0, 2, -3, -1])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = k_63_cast_fp16)[name = string("transpose_98")]; + tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = q_63_cast_fp16)[name = string("transpose_99")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_31_cast_fp16")]; + tensor var_1778_cast_fp16 = softmax(axis = var_1714, x = qk_31_cast_fp16)[name = string("op_1778_cast_fp16")]; + bool var_1780_transpose_x_0 = const()[name = string("op_1780_transpose_x_0"), val = bool(false)]; + bool var_1780_transpose_y_0 = const()[name = string("op_1780_transpose_y_0"), val = bool(false)]; + tensor v_63_cast_fp16 = transpose(perm = var_1774, x = var_1773_cast_fp16)[name = string("transpose_97")]; + tensor var_1780_cast_fp16 = matmul(transpose_x = var_1780_transpose_x_0, transpose_y = var_1780_transpose_y_0, x = var_1778_cast_fp16, y = v_63_cast_fp16)[name = string("op_1780_cast_fp16")]; + tensor var_1781 = const()[name = string("op_1781"), val = tensor([0, 2, 1, 3])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([1, 1500, 1280])]; + tensor var_1782_cast_fp16 = transpose(perm = var_1781, x = var_1780_cast_fp16)[name = string("transpose_96")]; + tensor x_191_cast_fp16 = reshape(shape = concat_15, x = var_1782_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor var_1786_to_fp16 = const()[name = string("op_1786_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(614801856)))]; + tensor var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618078720)))]; + tensor linear_93_cast_fp16 = linear(bias = var_1787_to_fp16, weight = var_1786_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_193_cast_fp16_1 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_1794_axes_0 = const()[name = string("op_1794_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618081344)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618083968)))]; + tensor var_1794_cast_fp16 = layer_norm(axes = var_1794_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1720_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16_1)[name = string("op_1794_cast_fp16")]; + tensor var_1803_to_fp16 = const()[name = string("op_1803_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(618086592)))]; + tensor var_1804_to_fp16 = const()[name = string("op_1804_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(631193856)))]; + tensor linear_94_cast_fp16 = linear(bias = var_1804_to_fp16, weight = var_1803_to_fp16, x = var_1794_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")]; + tensor x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_1809_to_fp16 = const()[name = string("op_1809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(631204160)))]; + tensor var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/0-weight.bin"), offset = uint64(644311424)))]; + tensor linear_95_cast_fp16_1 = linear(bias = var_1810_to_fp16, weight = var_1809_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")]; + string linear_95_cast_fp16_dtype_0 = const()[name = string("linear_95_cast_fp16_dtype_0"), val = string("fp32")]; + string x_193_cast_fp16_dtype_0 = const()[name = string("x_193_cast_fp16_dtype_0"), val = string("fp32")]; + tensor x_193_cast_fp16 = cast(dtype = x_193_cast_fp16_dtype_0, x = x_193_cast_fp16_1)[name = string("cast_2")]; + tensor linear_95_cast_fp16 = cast(dtype = linear_95_cast_fp16_dtype_0, x = linear_95_cast_fp16_1)[name = string("cast_3")]; + } -> (linear_95_cast_fp16, x_193_cast_fp16); +} \ No newline at end of file diff --git a/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin b/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..86bc91dc5f3c78b8d21adbe8207e7316052e5112 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model0/weights/0-weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593ca90475cb8284e5c5a84580932a05e0cc3fe872e4804b4b1f6d7fba16c21c +size 644314048 diff --git a/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin b/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed18ae44ab3d09ffbed846536c84109f12b19b1 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model1/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a8281049b2a65a3be541cfd9f949e84b8fe1c5251ce90e46da1626fed54e58a +size 108 diff --git a/large-v3/encoder.mlmodelc/model1/coremldata.bin b/large-v3/encoder.mlmodelc/model1/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ea59338ab416594015715ac6994e32a8c96e239 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model1/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70195139816248a2b1fbef695f96decb60b35af6f364f84a7d2293a3d0a09e11 +size 196 diff --git a/large-v3/encoder.mlmodelc/model1/model.mil b/large-v3/encoder.mlmodelc/model1/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..7d3b026fa91fad416f7820629ab7ce05c46aad69 --- /dev/null +++ b/large-v3/encoder.mlmodelc/model1/model.mil @@ -0,0 +1,945 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})] +{ + func main(tensor linear_95_cast_fp16, tensor x_193_cast_fp16) { + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(64)))]; + string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("fp16")]; + string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("fp16")]; + tensor cast_0 = cast(dtype = cast_0_dtype_0, x = linear_95_cast_fp16)[name = string("cast_0")]; + tensor cast_1 = cast(dtype = cast_1_dtype_0, x = x_193_cast_fp16)[name = string("cast_1")]; + tensor x_199_cast_fp16 = add(x = cast_1, y = cast_0)[name = string("x_199_cast_fp16")]; + int32 var_1820 = const()[name = string("op_1820"), val = int32(-1)]; + tensor var_1836_axes_0 = const()[name = string("op_1836_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(2688)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(5312)))]; + fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1836_cast_fp16 = layer_norm(axes = var_1836_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1836_cast_fp16")]; + tensor var_1847_to_fp16 = const()[name = string("op_1847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(7936)))]; + tensor var_1848_to_fp16 = const()[name = string("op_1848_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3284800)))]; + tensor linear_96_cast_fp16 = linear(bias = var_1848_to_fp16, weight = var_1847_to_fp16, x = var_1836_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(3287424)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1851_to_fp16, x = var_1836_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_1855_to_fp16 = const()[name = string("op_1855_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(6564288)))]; + tensor var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9841152)))]; + tensor linear_98_cast_fp16 = linear(bias = var_1856_to_fp16, weight = var_1855_to_fp16, x = var_1836_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_1864 = const()[name = string("op_1864"), val = tensor([1, 1500, 20, -1])]; + tensor var_1865_cast_fp16 = reshape(shape = var_1864, x = linear_96_cast_fp16)[name = string("op_1865_cast_fp16")]; + tensor const_256_to_fp16 = const()[name = string("const_256_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1865_cast_fp16, y = const_256_to_fp16)[name = string("q_67_cast_fp16")]; + tensor var_1871 = const()[name = string("op_1871"), val = tensor([1, 1500, 20, -1])]; + tensor var_1872_cast_fp16 = reshape(shape = var_1871, x = linear_97_cast_fp16)[name = string("op_1872_cast_fp16")]; + tensor const_257_to_fp16 = const()[name = string("const_257_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_67_cast_fp16 = mul(x = var_1872_cast_fp16, y = const_257_to_fp16)[name = string("k_67_cast_fp16")]; + tensor var_1878 = const()[name = string("op_1878"), val = tensor([1, 1500, 20, -1])]; + tensor var_1879_cast_fp16 = reshape(shape = var_1878, x = linear_98_cast_fp16)[name = string("op_1879_cast_fp16")]; + tensor var_1880 = const()[name = string("op_1880"), val = tensor([0, 2, -3, -1])]; + bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)]; + bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = k_67_cast_fp16)[name = string("transpose_158")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = q_67_cast_fp16)[name = string("transpose_159")]; + tensor qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_33_cast_fp16")]; + tensor var_1884_cast_fp16 = softmax(axis = var_1820, x = qk_33_cast_fp16)[name = string("op_1884_cast_fp16")]; + bool var_1886_transpose_x_0 = const()[name = string("op_1886_transpose_x_0"), val = bool(false)]; + bool var_1886_transpose_y_0 = const()[name = string("op_1886_transpose_y_0"), val = bool(false)]; + tensor v_67_cast_fp16 = transpose(perm = var_1880, x = var_1879_cast_fp16)[name = string("transpose_157")]; + tensor var_1886_cast_fp16 = matmul(transpose_x = var_1886_transpose_x_0, transpose_y = var_1886_transpose_y_0, x = var_1884_cast_fp16, y = v_67_cast_fp16)[name = string("op_1886_cast_fp16")]; + tensor var_1887 = const()[name = string("op_1887"), val = tensor([0, 2, 1, 3])]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([1, 1500, 1280])]; + tensor var_1888_cast_fp16 = transpose(perm = var_1887, x = var_1886_cast_fp16)[name = string("transpose_156")]; + tensor x_203_cast_fp16 = reshape(shape = concat_16, x = var_1888_cast_fp16)[name = string("x_203_cast_fp16")]; + tensor var_1892_to_fp16 = const()[name = string("op_1892_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(9843776)))]; + tensor var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13120640)))]; + tensor linear_99_cast_fp16 = linear(bias = var_1893_to_fp16, weight = var_1892_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_1900_axes_0 = const()[name = string("op_1900_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13123264)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13125888)))]; + tensor var_1900_cast_fp16 = layer_norm(axes = var_1900_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1826_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1900_cast_fp16")]; + tensor var_1909_to_fp16 = const()[name = string("op_1909_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(13128512)))]; + tensor var_1910_to_fp16 = const()[name = string("op_1910_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26235776)))]; + tensor linear_100_cast_fp16 = linear(bias = var_1910_to_fp16, weight = var_1909_to_fp16, x = var_1900_cast_fp16)[name = string("linear_100_cast_fp16")]; + string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")]; + tensor x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(26246080)))]; + tensor var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39353344)))]; + tensor linear_101_cast_fp16 = linear(bias = var_1916_to_fp16, weight = var_1915_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")]; + int32 var_1926 = const()[name = string("op_1926"), val = int32(-1)]; + tensor var_1942_axes_0 = const()[name = string("op_1942_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39355968)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39358592)))]; + fp16 var_1932_to_fp16 = const()[name = string("op_1932_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1942_cast_fp16 = layer_norm(axes = var_1942_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1942_cast_fp16")]; + tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(39361216)))]; + tensor var_1954_to_fp16 = const()[name = string("op_1954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42638080)))]; + tensor linear_102_cast_fp16 = linear(bias = var_1954_to_fp16, weight = var_1953_to_fp16, x = var_1942_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_1957_to_fp16 = const()[name = string("op_1957_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(42640704)))]; + tensor linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1957_to_fp16, x = var_1942_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor var_1961_to_fp16 = const()[name = string("op_1961_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(45917568)))]; + tensor var_1962_to_fp16 = const()[name = string("op_1962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49194432)))]; + tensor linear_104_cast_fp16 = linear(bias = var_1962_to_fp16, weight = var_1961_to_fp16, x = var_1942_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_1970 = const()[name = string("op_1970"), val = tensor([1, 1500, 20, -1])]; + tensor var_1971_cast_fp16 = reshape(shape = var_1970, x = linear_102_cast_fp16)[name = string("op_1971_cast_fp16")]; + tensor const_258_to_fp16 = const()[name = string("const_258_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_1971_cast_fp16, y = const_258_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_1977 = const()[name = string("op_1977"), val = tensor([1, 1500, 20, -1])]; + tensor var_1978_cast_fp16 = reshape(shape = var_1977, x = linear_103_cast_fp16)[name = string("op_1978_cast_fp16")]; + tensor const_259_to_fp16 = const()[name = string("const_259_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_71_cast_fp16 = mul(x = var_1978_cast_fp16, y = const_259_to_fp16)[name = string("k_71_cast_fp16")]; + tensor var_1984 = const()[name = string("op_1984"), val = tensor([1, 1500, 20, -1])]; + tensor var_1985_cast_fp16 = reshape(shape = var_1984, x = linear_104_cast_fp16)[name = string("op_1985_cast_fp16")]; + tensor var_1986 = const()[name = string("op_1986"), val = tensor([0, 2, -3, -1])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = k_71_cast_fp16)[name = string("transpose_154")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = q_71_cast_fp16)[name = string("transpose_155")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_35_cast_fp16")]; + tensor var_1990_cast_fp16 = softmax(axis = var_1926, x = qk_35_cast_fp16)[name = string("op_1990_cast_fp16")]; + bool var_1992_transpose_x_0 = const()[name = string("op_1992_transpose_x_0"), val = bool(false)]; + bool var_1992_transpose_y_0 = const()[name = string("op_1992_transpose_y_0"), val = bool(false)]; + tensor v_71_cast_fp16 = transpose(perm = var_1986, x = var_1985_cast_fp16)[name = string("transpose_153")]; + tensor var_1992_cast_fp16 = matmul(transpose_x = var_1992_transpose_x_0, transpose_y = var_1992_transpose_y_0, x = var_1990_cast_fp16, y = v_71_cast_fp16)[name = string("op_1992_cast_fp16")]; + tensor var_1993 = const()[name = string("op_1993"), val = tensor([0, 2, 1, 3])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 1500, 1280])]; + tensor var_1994_cast_fp16 = transpose(perm = var_1993, x = var_1992_cast_fp16)[name = string("transpose_152")]; + tensor x_215_cast_fp16 = reshape(shape = concat_17, x = var_1994_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(49197056)))]; + tensor var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52473920)))]; + tensor linear_105_cast_fp16 = linear(bias = var_1999_to_fp16, weight = var_1998_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2006_axes_0 = const()[name = string("op_2006_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52476544)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52479168)))]; + tensor var_2006_cast_fp16 = layer_norm(axes = var_2006_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1932_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_2006_cast_fp16")]; + tensor var_2015_to_fp16 = const()[name = string("op_2015_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(52481792)))]; + tensor var_2016_to_fp16 = const()[name = string("op_2016_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65589056)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2016_to_fp16, weight = var_2015_to_fp16, x = var_2006_cast_fp16)[name = string("linear_106_cast_fp16")]; + string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")]; + tensor x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")]; + tensor var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(65599360)))]; + tensor var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78706624)))]; + tensor linear_107_cast_fp16 = linear(bias = var_2022_to_fp16, weight = var_2021_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")]; + int32 var_2032 = const()[name = string("op_2032"), val = int32(-1)]; + tensor var_2048_axes_0 = const()[name = string("op_2048_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78709248)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78711872)))]; + fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2048_cast_fp16 = layer_norm(axes = var_2048_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2048_cast_fp16")]; + tensor var_2059_to_fp16 = const()[name = string("op_2059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(78714496)))]; + tensor var_2060_to_fp16 = const()[name = string("op_2060_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81991360)))]; + tensor linear_108_cast_fp16 = linear(bias = var_2060_to_fp16, weight = var_2059_to_fp16, x = var_2048_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(81993984)))]; + tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2063_to_fp16, x = var_2048_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(85270848)))]; + tensor var_2068_to_fp16 = const()[name = string("op_2068_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88547712)))]; + tensor linear_110_cast_fp16 = linear(bias = var_2068_to_fp16, weight = var_2067_to_fp16, x = var_2048_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor var_2076 = const()[name = string("op_2076"), val = tensor([1, 1500, 20, -1])]; + tensor var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_108_cast_fp16)[name = string("op_2077_cast_fp16")]; + tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2077_cast_fp16, y = const_260_to_fp16)[name = string("q_75_cast_fp16")]; + tensor var_2083 = const()[name = string("op_2083"), val = tensor([1, 1500, 20, -1])]; + tensor var_2084_cast_fp16 = reshape(shape = var_2083, x = linear_109_cast_fp16)[name = string("op_2084_cast_fp16")]; + tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_2084_cast_fp16, y = const_261_to_fp16)[name = string("k_75_cast_fp16")]; + tensor var_2090 = const()[name = string("op_2090"), val = tensor([1, 1500, 20, -1])]; + tensor var_2091_cast_fp16 = reshape(shape = var_2090, x = linear_110_cast_fp16)[name = string("op_2091_cast_fp16")]; + tensor var_2092 = const()[name = string("op_2092"), val = tensor([0, 2, -3, -1])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = k_75_cast_fp16)[name = string("transpose_150")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = q_75_cast_fp16)[name = string("transpose_151")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_37_cast_fp16")]; + tensor var_2096_cast_fp16 = softmax(axis = var_2032, x = qk_37_cast_fp16)[name = string("op_2096_cast_fp16")]; + bool var_2098_transpose_x_0 = const()[name = string("op_2098_transpose_x_0"), val = bool(false)]; + bool var_2098_transpose_y_0 = const()[name = string("op_2098_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_2092, x = var_2091_cast_fp16)[name = string("transpose_149")]; + tensor var_2098_cast_fp16 = matmul(transpose_x = var_2098_transpose_x_0, transpose_y = var_2098_transpose_y_0, x = var_2096_cast_fp16, y = v_75_cast_fp16)[name = string("op_2098_cast_fp16")]; + tensor var_2099 = const()[name = string("op_2099"), val = tensor([0, 2, 1, 3])]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([1, 1500, 1280])]; + tensor var_2100_cast_fp16 = transpose(perm = var_2099, x = var_2098_cast_fp16)[name = string("transpose_148")]; + tensor x_227_cast_fp16 = reshape(shape = concat_18, x = var_2100_cast_fp16)[name = string("x_227_cast_fp16")]; + tensor var_2104_to_fp16 = const()[name = string("op_2104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(88550336)))]; + tensor var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91827200)))]; + tensor linear_111_cast_fp16 = linear(bias = var_2105_to_fp16, weight = var_2104_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2112_axes_0 = const()[name = string("op_2112_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91829824)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91832448)))]; + tensor var_2112_cast_fp16 = layer_norm(axes = var_2112_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2038_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2112_cast_fp16")]; + tensor var_2121_to_fp16 = const()[name = string("op_2121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(91835072)))]; + tensor var_2122_to_fp16 = const()[name = string("op_2122_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104942336)))]; + tensor linear_112_cast_fp16 = linear(bias = var_2122_to_fp16, weight = var_2121_to_fp16, x = var_2112_cast_fp16)[name = string("linear_112_cast_fp16")]; + string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")]; + tensor x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")]; + tensor var_2127_to_fp16 = const()[name = string("op_2127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(104952640)))]; + tensor var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118059904)))]; + tensor linear_113_cast_fp16 = linear(bias = var_2128_to_fp16, weight = var_2127_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")]; + int32 var_2138 = const()[name = string("op_2138"), val = int32(-1)]; + tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118062528)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118065152)))]; + fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2154_cast_fp16")]; + tensor var_2165_to_fp16 = const()[name = string("op_2165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(118067776)))]; + tensor var_2166_to_fp16 = const()[name = string("op_2166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121344640)))]; + tensor linear_114_cast_fp16 = linear(bias = var_2166_to_fp16, weight = var_2165_to_fp16, x = var_2154_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_2169_to_fp16 = const()[name = string("op_2169_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(121347264)))]; + tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2169_to_fp16, x = var_2154_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor var_2173_to_fp16 = const()[name = string("op_2173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(124624128)))]; + tensor var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127900992)))]; + tensor linear_116_cast_fp16 = linear(bias = var_2174_to_fp16, weight = var_2173_to_fp16, x = var_2154_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_2182 = const()[name = string("op_2182"), val = tensor([1, 1500, 20, -1])]; + tensor var_2183_cast_fp16 = reshape(shape = var_2182, x = linear_114_cast_fp16)[name = string("op_2183_cast_fp16")]; + tensor const_262_to_fp16 = const()[name = string("const_262_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2183_cast_fp16, y = const_262_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2189 = const()[name = string("op_2189"), val = tensor([1, 1500, 20, -1])]; + tensor var_2190_cast_fp16 = reshape(shape = var_2189, x = linear_115_cast_fp16)[name = string("op_2190_cast_fp16")]; + tensor const_263_to_fp16 = const()[name = string("const_263_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_2190_cast_fp16, y = const_263_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_2196 = const()[name = string("op_2196"), val = tensor([1, 1500, 20, -1])]; + tensor var_2197_cast_fp16 = reshape(shape = var_2196, x = linear_116_cast_fp16)[name = string("op_2197_cast_fp16")]; + tensor var_2198 = const()[name = string("op_2198"), val = tensor([0, 2, -3, -1])]; + bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)]; + bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = k_79_cast_fp16)[name = string("transpose_146")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = q_79_cast_fp16)[name = string("transpose_147")]; + tensor qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_39_cast_fp16")]; + tensor var_2202_cast_fp16 = softmax(axis = var_2138, x = qk_39_cast_fp16)[name = string("op_2202_cast_fp16")]; + bool var_2204_transpose_x_0 = const()[name = string("op_2204_transpose_x_0"), val = bool(false)]; + bool var_2204_transpose_y_0 = const()[name = string("op_2204_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_2198, x = var_2197_cast_fp16)[name = string("transpose_145")]; + tensor var_2204_cast_fp16 = matmul(transpose_x = var_2204_transpose_x_0, transpose_y = var_2204_transpose_y_0, x = var_2202_cast_fp16, y = v_79_cast_fp16)[name = string("op_2204_cast_fp16")]; + tensor var_2205 = const()[name = string("op_2205"), val = tensor([0, 2, 1, 3])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([1, 1500, 1280])]; + tensor var_2206_cast_fp16 = transpose(perm = var_2205, x = var_2204_cast_fp16)[name = string("transpose_144")]; + tensor x_239_cast_fp16 = reshape(shape = concat_19, x = var_2206_cast_fp16)[name = string("x_239_cast_fp16")]; + tensor var_2210_to_fp16 = const()[name = string("op_2210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(127903616)))]; + tensor var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131180480)))]; + tensor linear_117_cast_fp16 = linear(bias = var_2211_to_fp16, weight = var_2210_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2218_axes_0 = const()[name = string("op_2218_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131183104)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131185728)))]; + tensor var_2218_cast_fp16 = layer_norm(axes = var_2218_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2144_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2218_cast_fp16")]; + tensor var_2227_to_fp16 = const()[name = string("op_2227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(131188352)))]; + tensor var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144295616)))]; + tensor linear_118_cast_fp16 = linear(bias = var_2228_to_fp16, weight = var_2227_to_fp16, x = var_2218_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")]; + tensor x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_2233_to_fp16 = const()[name = string("op_2233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(144305920)))]; + tensor var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157413184)))]; + tensor linear_119_cast_fp16 = linear(bias = var_2234_to_fp16, weight = var_2233_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")]; + int32 var_2244 = const()[name = string("op_2244"), val = int32(-1)]; + tensor var_2260_axes_0 = const()[name = string("op_2260_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157415808)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157418432)))]; + fp16 var_2250_to_fp16 = const()[name = string("op_2250_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2260_cast_fp16 = layer_norm(axes = var_2260_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2260_cast_fp16")]; + tensor var_2271_to_fp16 = const()[name = string("op_2271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(157421056)))]; + tensor var_2272_to_fp16 = const()[name = string("op_2272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160697920)))]; + tensor linear_120_cast_fp16 = linear(bias = var_2272_to_fp16, weight = var_2271_to_fp16, x = var_2260_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_2275_to_fp16 = const()[name = string("op_2275_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(160700544)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2275_to_fp16, x = var_2260_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_2279_to_fp16 = const()[name = string("op_2279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(163977408)))]; + tensor var_2280_to_fp16 = const()[name = string("op_2280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167254272)))]; + tensor linear_122_cast_fp16 = linear(bias = var_2280_to_fp16, weight = var_2279_to_fp16, x = var_2260_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_2288 = const()[name = string("op_2288"), val = tensor([1, 1500, 20, -1])]; + tensor var_2289_cast_fp16 = reshape(shape = var_2288, x = linear_120_cast_fp16)[name = string("op_2289_cast_fp16")]; + tensor const_264_to_fp16 = const()[name = string("const_264_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2289_cast_fp16, y = const_264_to_fp16)[name = string("q_83_cast_fp16")]; + tensor var_2295 = const()[name = string("op_2295"), val = tensor([1, 1500, 20, -1])]; + tensor var_2296_cast_fp16 = reshape(shape = var_2295, x = linear_121_cast_fp16)[name = string("op_2296_cast_fp16")]; + tensor const_265_to_fp16 = const()[name = string("const_265_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_83_cast_fp16 = mul(x = var_2296_cast_fp16, y = const_265_to_fp16)[name = string("k_83_cast_fp16")]; + tensor var_2302 = const()[name = string("op_2302"), val = tensor([1, 1500, 20, -1])]; + tensor var_2303_cast_fp16 = reshape(shape = var_2302, x = linear_122_cast_fp16)[name = string("op_2303_cast_fp16")]; + tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, -3, -1])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = k_83_cast_fp16)[name = string("transpose_142")]; + tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = q_83_cast_fp16)[name = string("transpose_143")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_72, y = transpose_73)[name = string("qk_41_cast_fp16")]; + tensor var_2308_cast_fp16 = softmax(axis = var_2244, x = qk_41_cast_fp16)[name = string("op_2308_cast_fp16")]; + bool var_2310_transpose_x_0 = const()[name = string("op_2310_transpose_x_0"), val = bool(false)]; + bool var_2310_transpose_y_0 = const()[name = string("op_2310_transpose_y_0"), val = bool(false)]; + tensor v_83_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_141")]; + tensor var_2310_cast_fp16 = matmul(transpose_x = var_2310_transpose_x_0, transpose_y = var_2310_transpose_y_0, x = var_2308_cast_fp16, y = v_83_cast_fp16)[name = string("op_2310_cast_fp16")]; + tensor var_2311 = const()[name = string("op_2311"), val = tensor([0, 2, 1, 3])]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([1, 1500, 1280])]; + tensor var_2312_cast_fp16 = transpose(perm = var_2311, x = var_2310_cast_fp16)[name = string("transpose_140")]; + tensor x_251_cast_fp16 = reshape(shape = concat_20, x = var_2312_cast_fp16)[name = string("x_251_cast_fp16")]; + tensor var_2316_to_fp16 = const()[name = string("op_2316_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(167256896)))]; + tensor var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170533760)))]; + tensor linear_123_cast_fp16 = linear(bias = var_2317_to_fp16, weight = var_2316_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_2324_axes_0 = const()[name = string("op_2324_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170536384)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170539008)))]; + tensor var_2324_cast_fp16 = layer_norm(axes = var_2324_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2250_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2324_cast_fp16")]; + tensor var_2333_to_fp16 = const()[name = string("op_2333_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(170541632)))]; + tensor var_2334_to_fp16 = const()[name = string("op_2334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183648896)))]; + tensor linear_124_cast_fp16 = linear(bias = var_2334_to_fp16, weight = var_2333_to_fp16, x = var_2324_cast_fp16)[name = string("linear_124_cast_fp16")]; + string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")]; + tensor x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")]; + tensor var_2339_to_fp16 = const()[name = string("op_2339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(183659200)))]; + tensor var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196766464)))]; + tensor linear_125_cast_fp16 = linear(bias = var_2340_to_fp16, weight = var_2339_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")]; + int32 var_2350 = const()[name = string("op_2350"), val = int32(-1)]; + tensor var_2366_axes_0 = const()[name = string("op_2366_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196769088)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196771712)))]; + fp16 var_2356_to_fp16 = const()[name = string("op_2356_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2366_cast_fp16 = layer_norm(axes = var_2366_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2366_cast_fp16")]; + tensor var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(196774336)))]; + tensor var_2378_to_fp16 = const()[name = string("op_2378_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200051200)))]; + tensor linear_126_cast_fp16 = linear(bias = var_2378_to_fp16, weight = var_2377_to_fp16, x = var_2366_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(200053824)))]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2381_to_fp16, x = var_2366_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor var_2385_to_fp16 = const()[name = string("op_2385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(203330688)))]; + tensor var_2386_to_fp16 = const()[name = string("op_2386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206607552)))]; + tensor linear_128_cast_fp16 = linear(bias = var_2386_to_fp16, weight = var_2385_to_fp16, x = var_2366_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_2394 = const()[name = string("op_2394"), val = tensor([1, 1500, 20, -1])]; + tensor var_2395_cast_fp16 = reshape(shape = var_2394, x = linear_126_cast_fp16)[name = string("op_2395_cast_fp16")]; + tensor const_266_to_fp16 = const()[name = string("const_266_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_266_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2401 = const()[name = string("op_2401"), val = tensor([1, 1500, 20, -1])]; + tensor var_2402_cast_fp16 = reshape(shape = var_2401, x = linear_127_cast_fp16)[name = string("op_2402_cast_fp16")]; + tensor const_267_to_fp16 = const()[name = string("const_267_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_87_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_267_to_fp16)[name = string("k_87_cast_fp16")]; + tensor var_2408 = const()[name = string("op_2408"), val = tensor([1, 1500, 20, -1])]; + tensor var_2409_cast_fp16 = reshape(shape = var_2408, x = linear_128_cast_fp16)[name = string("op_2409_cast_fp16")]; + tensor var_2410 = const()[name = string("op_2410"), val = tensor([0, 2, -3, -1])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = k_87_cast_fp16)[name = string("transpose_138")]; + tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = q_87_cast_fp16)[name = string("transpose_139")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_74, y = transpose_75)[name = string("qk_43_cast_fp16")]; + tensor var_2414_cast_fp16 = softmax(axis = var_2350, x = qk_43_cast_fp16)[name = string("op_2414_cast_fp16")]; + bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)]; + bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)]; + tensor v_87_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_137")]; + tensor var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_87_cast_fp16)[name = string("op_2416_cast_fp16")]; + tensor var_2417 = const()[name = string("op_2417"), val = tensor([0, 2, 1, 3])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([1, 1500, 1280])]; + tensor var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_136")]; + tensor x_263_cast_fp16 = reshape(shape = concat_21, x = var_2418_cast_fp16)[name = string("x_263_cast_fp16")]; + tensor var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(206610176)))]; + tensor var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209887040)))]; + tensor linear_129_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209889664)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209892288)))]; + tensor var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2356_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2430_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(209894912)))]; + tensor var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223002176)))]; + tensor linear_130_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_130_cast_fp16")]; + string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")]; + tensor x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")]; + tensor var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(223012480)))]; + tensor var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236119744)))]; + tensor linear_131_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")]; + int32 var_2456 = const()[name = string("op_2456"), val = int32(-1)]; + tensor var_2472_axes_0 = const()[name = string("op_2472_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236122368)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236124992)))]; + fp16 var_2462_to_fp16 = const()[name = string("op_2462_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2472_cast_fp16 = layer_norm(axes = var_2472_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2472_cast_fp16")]; + tensor var_2483_to_fp16 = const()[name = string("op_2483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(236127616)))]; + tensor var_2484_to_fp16 = const()[name = string("op_2484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239404480)))]; + tensor linear_132_cast_fp16 = linear(bias = var_2484_to_fp16, weight = var_2483_to_fp16, x = var_2472_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor var_2487_to_fp16 = const()[name = string("op_2487_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(239407104)))]; + tensor linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2487_to_fp16, x = var_2472_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor var_2491_to_fp16 = const()[name = string("op_2491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(242683968)))]; + tensor var_2492_to_fp16 = const()[name = string("op_2492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245960832)))]; + tensor linear_134_cast_fp16 = linear(bias = var_2492_to_fp16, weight = var_2491_to_fp16, x = var_2472_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor var_2500 = const()[name = string("op_2500"), val = tensor([1, 1500, 20, -1])]; + tensor var_2501_cast_fp16 = reshape(shape = var_2500, x = linear_132_cast_fp16)[name = string("op_2501_cast_fp16")]; + tensor const_268_to_fp16 = const()[name = string("const_268_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2501_cast_fp16, y = const_268_to_fp16)[name = string("q_91_cast_fp16")]; + tensor var_2507 = const()[name = string("op_2507"), val = tensor([1, 1500, 20, -1])]; + tensor var_2508_cast_fp16 = reshape(shape = var_2507, x = linear_133_cast_fp16)[name = string("op_2508_cast_fp16")]; + tensor const_269_to_fp16 = const()[name = string("const_269_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_91_cast_fp16 = mul(x = var_2508_cast_fp16, y = const_269_to_fp16)[name = string("k_91_cast_fp16")]; + tensor var_2514 = const()[name = string("op_2514"), val = tensor([1, 1500, 20, -1])]; + tensor var_2515_cast_fp16 = reshape(shape = var_2514, x = linear_134_cast_fp16)[name = string("op_2515_cast_fp16")]; + tensor var_2516 = const()[name = string("op_2516"), val = tensor([0, 2, -3, -1])]; + bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)]; + bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)]; + tensor transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = k_91_cast_fp16)[name = string("transpose_134")]; + tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = q_91_cast_fp16)[name = string("transpose_135")]; + tensor qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_76, y = transpose_77)[name = string("qk_45_cast_fp16")]; + tensor var_2520_cast_fp16 = softmax(axis = var_2456, x = qk_45_cast_fp16)[name = string("op_2520_cast_fp16")]; + bool var_2522_transpose_x_0 = const()[name = string("op_2522_transpose_x_0"), val = bool(false)]; + bool var_2522_transpose_y_0 = const()[name = string("op_2522_transpose_y_0"), val = bool(false)]; + tensor v_91_cast_fp16 = transpose(perm = var_2516, x = var_2515_cast_fp16)[name = string("transpose_133")]; + tensor var_2522_cast_fp16 = matmul(transpose_x = var_2522_transpose_x_0, transpose_y = var_2522_transpose_y_0, x = var_2520_cast_fp16, y = v_91_cast_fp16)[name = string("op_2522_cast_fp16")]; + tensor var_2523 = const()[name = string("op_2523"), val = tensor([0, 2, 1, 3])]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([1, 1500, 1280])]; + tensor var_2524_cast_fp16 = transpose(perm = var_2523, x = var_2522_cast_fp16)[name = string("transpose_132")]; + tensor x_275_cast_fp16 = reshape(shape = concat_22, x = var_2524_cast_fp16)[name = string("x_275_cast_fp16")]; + tensor var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(245963456)))]; + tensor var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249240320)))]; + tensor linear_135_cast_fp16 = linear(bias = var_2529_to_fp16, weight = var_2528_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_2536_axes_0 = const()[name = string("op_2536_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249242944)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249245568)))]; + tensor var_2536_cast_fp16 = layer_norm(axes = var_2536_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2462_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2536_cast_fp16")]; + tensor var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(249248192)))]; + tensor var_2546_to_fp16 = const()[name = string("op_2546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262355456)))]; + tensor linear_136_cast_fp16 = linear(bias = var_2546_to_fp16, weight = var_2545_to_fp16, x = var_2536_cast_fp16)[name = string("linear_136_cast_fp16")]; + string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")]; + tensor x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")]; + tensor var_2551_to_fp16 = const()[name = string("op_2551_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(262365760)))]; + tensor var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275473024)))]; + tensor linear_137_cast_fp16 = linear(bias = var_2552_to_fp16, weight = var_2551_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")]; + int32 var_2562 = const()[name = string("op_2562"), val = int32(-1)]; + tensor var_2578_axes_0 = const()[name = string("op_2578_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275475648)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275478272)))]; + fp16 var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2578_cast_fp16 = layer_norm(axes = var_2578_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2578_cast_fp16")]; + tensor var_2589_to_fp16 = const()[name = string("op_2589_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(275480896)))]; + tensor var_2590_to_fp16 = const()[name = string("op_2590_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278757760)))]; + tensor linear_138_cast_fp16 = linear(bias = var_2590_to_fp16, weight = var_2589_to_fp16, x = var_2578_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_2593_to_fp16 = const()[name = string("op_2593_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(278760384)))]; + tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2593_to_fp16, x = var_2578_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor var_2597_to_fp16 = const()[name = string("op_2597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(282037248)))]; + tensor var_2598_to_fp16 = const()[name = string("op_2598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285314112)))]; + tensor linear_140_cast_fp16 = linear(bias = var_2598_to_fp16, weight = var_2597_to_fp16, x = var_2578_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor var_2606 = const()[name = string("op_2606"), val = tensor([1, 1500, 20, -1])]; + tensor var_2607_cast_fp16 = reshape(shape = var_2606, x = linear_138_cast_fp16)[name = string("op_2607_cast_fp16")]; + tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_95_cast_fp16 = mul(x = var_2607_cast_fp16, y = const_270_to_fp16)[name = string("q_95_cast_fp16")]; + tensor var_2613 = const()[name = string("op_2613"), val = tensor([1, 1500, 20, -1])]; + tensor var_2614_cast_fp16 = reshape(shape = var_2613, x = linear_139_cast_fp16)[name = string("op_2614_cast_fp16")]; + tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2614_cast_fp16, y = const_271_to_fp16)[name = string("k_95_cast_fp16")]; + tensor var_2620 = const()[name = string("op_2620"), val = tensor([1, 1500, 20, -1])]; + tensor var_2621_cast_fp16 = reshape(shape = var_2620, x = linear_140_cast_fp16)[name = string("op_2621_cast_fp16")]; + tensor var_2622 = const()[name = string("op_2622"), val = tensor([0, 2, -3, -1])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = k_95_cast_fp16)[name = string("transpose_130")]; + tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = q_95_cast_fp16)[name = string("transpose_131")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_78, y = transpose_79)[name = string("qk_47_cast_fp16")]; + tensor var_2626_cast_fp16 = softmax(axis = var_2562, x = qk_47_cast_fp16)[name = string("op_2626_cast_fp16")]; + bool var_2628_transpose_x_0 = const()[name = string("op_2628_transpose_x_0"), val = bool(false)]; + bool var_2628_transpose_y_0 = const()[name = string("op_2628_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2622, x = var_2621_cast_fp16)[name = string("transpose_129")]; + tensor var_2628_cast_fp16 = matmul(transpose_x = var_2628_transpose_x_0, transpose_y = var_2628_transpose_y_0, x = var_2626_cast_fp16, y = v_95_cast_fp16)[name = string("op_2628_cast_fp16")]; + tensor var_2629 = const()[name = string("op_2629"), val = tensor([0, 2, 1, 3])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([1, 1500, 1280])]; + tensor var_2630_cast_fp16 = transpose(perm = var_2629, x = var_2628_cast_fp16)[name = string("transpose_128")]; + tensor x_287_cast_fp16 = reshape(shape = concat_23, x = var_2630_cast_fp16)[name = string("x_287_cast_fp16")]; + tensor var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(285316736)))]; + tensor var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288593600)))]; + tensor linear_141_cast_fp16 = linear(bias = var_2635_to_fp16, weight = var_2634_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_2642_axes_0 = const()[name = string("op_2642_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288596224)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288598848)))]; + tensor var_2642_cast_fp16 = layer_norm(axes = var_2642_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2568_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2642_cast_fp16")]; + tensor var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(288601472)))]; + tensor var_2652_to_fp16 = const()[name = string("op_2652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301708736)))]; + tensor linear_142_cast_fp16 = linear(bias = var_2652_to_fp16, weight = var_2651_to_fp16, x = var_2642_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")]; + tensor x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(301719040)))]; + tensor var_2658_to_fp16 = const()[name = string("op_2658_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314826304)))]; + tensor linear_143_cast_fp16 = linear(bias = var_2658_to_fp16, weight = var_2657_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_295_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_295_cast_fp16")]; + int32 var_2668 = const()[name = string("op_2668"), val = int32(-1)]; + tensor var_2684_axes_0 = const()[name = string("op_2684_axes_0"), val = tensor([-1])]; + tensor blocks_24_attn_ln_weight_to_fp16 = const()[name = string("blocks_24_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314828928)))]; + tensor blocks_24_attn_ln_bias_to_fp16 = const()[name = string("blocks_24_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314831552)))]; + fp16 var_2674_to_fp16 = const()[name = string("op_2674_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2684_cast_fp16 = layer_norm(axes = var_2684_axes_0, beta = blocks_24_attn_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_attn_ln_weight_to_fp16, x = x_295_cast_fp16)[name = string("op_2684_cast_fp16")]; + tensor var_2695_to_fp16 = const()[name = string("op_2695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(314834176)))]; + tensor var_2696_to_fp16 = const()[name = string("op_2696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318111040)))]; + tensor linear_144_cast_fp16 = linear(bias = var_2696_to_fp16, weight = var_2695_to_fp16, x = var_2684_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_2699_to_fp16 = const()[name = string("op_2699_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(318113664)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2699_to_fp16, x = var_2684_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor var_2703_to_fp16 = const()[name = string("op_2703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(321390528)))]; + tensor var_2704_to_fp16 = const()[name = string("op_2704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324667392)))]; + tensor linear_146_cast_fp16 = linear(bias = var_2704_to_fp16, weight = var_2703_to_fp16, x = var_2684_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor var_2712 = const()[name = string("op_2712"), val = tensor([1, 1500, 20, -1])]; + tensor var_2713_cast_fp16 = reshape(shape = var_2712, x = linear_144_cast_fp16)[name = string("op_2713_cast_fp16")]; + tensor const_272_to_fp16 = const()[name = string("const_272_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_99_cast_fp16 = mul(x = var_2713_cast_fp16, y = const_272_to_fp16)[name = string("q_99_cast_fp16")]; + tensor var_2719 = const()[name = string("op_2719"), val = tensor([1, 1500, 20, -1])]; + tensor var_2720_cast_fp16 = reshape(shape = var_2719, x = linear_145_cast_fp16)[name = string("op_2720_cast_fp16")]; + tensor const_273_to_fp16 = const()[name = string("const_273_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2720_cast_fp16, y = const_273_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2726 = const()[name = string("op_2726"), val = tensor([1, 1500, 20, -1])]; + tensor var_2727_cast_fp16 = reshape(shape = var_2726, x = linear_146_cast_fp16)[name = string("op_2727_cast_fp16")]; + tensor var_2728 = const()[name = string("op_2728"), val = tensor([0, 2, -3, -1])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = k_99_cast_fp16)[name = string("transpose_126")]; + tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = q_99_cast_fp16)[name = string("transpose_127")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_80, y = transpose_81)[name = string("qk_49_cast_fp16")]; + tensor var_2732_cast_fp16 = softmax(axis = var_2668, x = qk_49_cast_fp16)[name = string("op_2732_cast_fp16")]; + bool var_2734_transpose_x_0 = const()[name = string("op_2734_transpose_x_0"), val = bool(false)]; + bool var_2734_transpose_y_0 = const()[name = string("op_2734_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2728, x = var_2727_cast_fp16)[name = string("transpose_125")]; + tensor var_2734_cast_fp16 = matmul(transpose_x = var_2734_transpose_x_0, transpose_y = var_2734_transpose_y_0, x = var_2732_cast_fp16, y = v_99_cast_fp16)[name = string("op_2734_cast_fp16")]; + tensor var_2735 = const()[name = string("op_2735"), val = tensor([0, 2, 1, 3])]; + tensor concat_24 = const()[name = string("concat_24"), val = tensor([1, 1500, 1280])]; + tensor var_2736_cast_fp16 = transpose(perm = var_2735, x = var_2734_cast_fp16)[name = string("transpose_124")]; + tensor x_299_cast_fp16 = reshape(shape = concat_24, x = var_2736_cast_fp16)[name = string("x_299_cast_fp16")]; + tensor var_2740_to_fp16 = const()[name = string("op_2740_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(324670016)))]; + tensor var_2741_to_fp16 = const()[name = string("op_2741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327946880)))]; + tensor linear_147_cast_fp16 = linear(bias = var_2741_to_fp16, weight = var_2740_to_fp16, x = x_299_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor x_301_cast_fp16 = add(x = x_295_cast_fp16, y = linear_147_cast_fp16)[name = string("x_301_cast_fp16")]; + tensor var_2748_axes_0 = const()[name = string("op_2748_axes_0"), val = tensor([-1])]; + tensor blocks_24_mlp_ln_weight_to_fp16 = const()[name = string("blocks_24_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327949504)))]; + tensor blocks_24_mlp_ln_bias_to_fp16 = const()[name = string("blocks_24_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327952128)))]; + tensor var_2748_cast_fp16 = layer_norm(axes = var_2748_axes_0, beta = blocks_24_mlp_ln_bias_to_fp16, epsilon = var_2674_to_fp16, gamma = blocks_24_mlp_ln_weight_to_fp16, x = x_301_cast_fp16)[name = string("op_2748_cast_fp16")]; + tensor var_2757_to_fp16 = const()[name = string("op_2757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(327954752)))]; + tensor var_2758_to_fp16 = const()[name = string("op_2758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341062016)))]; + tensor linear_148_cast_fp16 = linear(bias = var_2758_to_fp16, weight = var_2757_to_fp16, x = var_2748_cast_fp16)[name = string("linear_148_cast_fp16")]; + string x_305_mode_0 = const()[name = string("x_305_mode_0"), val = string("EXACT")]; + tensor x_305_cast_fp16 = gelu(mode = x_305_mode_0, x = linear_148_cast_fp16)[name = string("x_305_cast_fp16")]; + tensor var_2763_to_fp16 = const()[name = string("op_2763_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(341072320)))]; + tensor var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354179584)))]; + tensor linear_149_cast_fp16 = linear(bias = var_2764_to_fp16, weight = var_2763_to_fp16, x = x_305_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor x_307_cast_fp16 = add(x = x_301_cast_fp16, y = linear_149_cast_fp16)[name = string("x_307_cast_fp16")]; + int32 var_2774 = const()[name = string("op_2774"), val = int32(-1)]; + tensor var_2790_axes_0 = const()[name = string("op_2790_axes_0"), val = tensor([-1])]; + tensor blocks_25_attn_ln_weight_to_fp16 = const()[name = string("blocks_25_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354182208)))]; + tensor blocks_25_attn_ln_bias_to_fp16 = const()[name = string("blocks_25_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354184832)))]; + fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2790_cast_fp16 = layer_norm(axes = var_2790_axes_0, beta = blocks_25_attn_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_attn_ln_weight_to_fp16, x = x_307_cast_fp16)[name = string("op_2790_cast_fp16")]; + tensor var_2801_to_fp16 = const()[name = string("op_2801_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(354187456)))]; + tensor var_2802_to_fp16 = const()[name = string("op_2802_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357464320)))]; + tensor linear_150_cast_fp16 = linear(bias = var_2802_to_fp16, weight = var_2801_to_fp16, x = var_2790_cast_fp16)[name = string("linear_150_cast_fp16")]; + tensor var_2805_to_fp16 = const()[name = string("op_2805_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(357466944)))]; + tensor linear_151_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2805_to_fp16, x = var_2790_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor var_2809_to_fp16 = const()[name = string("op_2809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(360743808)))]; + tensor var_2810_to_fp16 = const()[name = string("op_2810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364020672)))]; + tensor linear_152_cast_fp16 = linear(bias = var_2810_to_fp16, weight = var_2809_to_fp16, x = var_2790_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor var_2818 = const()[name = string("op_2818"), val = tensor([1, 1500, 20, -1])]; + tensor var_2819_cast_fp16 = reshape(shape = var_2818, x = linear_150_cast_fp16)[name = string("op_2819_cast_fp16")]; + tensor const_274_to_fp16 = const()[name = string("const_274_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_103_cast_fp16 = mul(x = var_2819_cast_fp16, y = const_274_to_fp16)[name = string("q_103_cast_fp16")]; + tensor var_2825 = const()[name = string("op_2825"), val = tensor([1, 1500, 20, -1])]; + tensor var_2826_cast_fp16 = reshape(shape = var_2825, x = linear_151_cast_fp16)[name = string("op_2826_cast_fp16")]; + tensor const_275_to_fp16 = const()[name = string("const_275_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_103_cast_fp16 = mul(x = var_2826_cast_fp16, y = const_275_to_fp16)[name = string("k_103_cast_fp16")]; + tensor var_2832 = const()[name = string("op_2832"), val = tensor([1, 1500, 20, -1])]; + tensor var_2833_cast_fp16 = reshape(shape = var_2832, x = linear_152_cast_fp16)[name = string("op_2833_cast_fp16")]; + tensor var_2834 = const()[name = string("op_2834"), val = tensor([0, 2, -3, -1])]; + bool qk_51_transpose_x_0 = const()[name = string("qk_51_transpose_x_0"), val = bool(false)]; + bool qk_51_transpose_y_0 = const()[name = string("qk_51_transpose_y_0"), val = bool(false)]; + tensor transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = k_103_cast_fp16)[name = string("transpose_122")]; + tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = q_103_cast_fp16)[name = string("transpose_123")]; + tensor qk_51_cast_fp16 = matmul(transpose_x = qk_51_transpose_x_0, transpose_y = qk_51_transpose_y_0, x = transpose_82, y = transpose_83)[name = string("qk_51_cast_fp16")]; + tensor var_2838_cast_fp16 = softmax(axis = var_2774, x = qk_51_cast_fp16)[name = string("op_2838_cast_fp16")]; + bool var_2840_transpose_x_0 = const()[name = string("op_2840_transpose_x_0"), val = bool(false)]; + bool var_2840_transpose_y_0 = const()[name = string("op_2840_transpose_y_0"), val = bool(false)]; + tensor v_103_cast_fp16 = transpose(perm = var_2834, x = var_2833_cast_fp16)[name = string("transpose_121")]; + tensor var_2840_cast_fp16 = matmul(transpose_x = var_2840_transpose_x_0, transpose_y = var_2840_transpose_y_0, x = var_2838_cast_fp16, y = v_103_cast_fp16)[name = string("op_2840_cast_fp16")]; + tensor var_2841 = const()[name = string("op_2841"), val = tensor([0, 2, 1, 3])]; + tensor concat_25 = const()[name = string("concat_25"), val = tensor([1, 1500, 1280])]; + tensor var_2842_cast_fp16 = transpose(perm = var_2841, x = var_2840_cast_fp16)[name = string("transpose_120")]; + tensor x_311_cast_fp16 = reshape(shape = concat_25, x = var_2842_cast_fp16)[name = string("x_311_cast_fp16")]; + tensor var_2846_to_fp16 = const()[name = string("op_2846_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(364023296)))]; + tensor var_2847_to_fp16 = const()[name = string("op_2847_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367300160)))]; + tensor linear_153_cast_fp16 = linear(bias = var_2847_to_fp16, weight = var_2846_to_fp16, x = x_311_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor x_313_cast_fp16 = add(x = x_307_cast_fp16, y = linear_153_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_2854_axes_0 = const()[name = string("op_2854_axes_0"), val = tensor([-1])]; + tensor blocks_25_mlp_ln_weight_to_fp16 = const()[name = string("blocks_25_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367302784)))]; + tensor blocks_25_mlp_ln_bias_to_fp16 = const()[name = string("blocks_25_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367305408)))]; + tensor var_2854_cast_fp16 = layer_norm(axes = var_2854_axes_0, beta = blocks_25_mlp_ln_bias_to_fp16, epsilon = var_2780_to_fp16, gamma = blocks_25_mlp_ln_weight_to_fp16, x = x_313_cast_fp16)[name = string("op_2854_cast_fp16")]; + tensor var_2863_to_fp16 = const()[name = string("op_2863_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(367308032)))]; + tensor var_2864_to_fp16 = const()[name = string("op_2864_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380415296)))]; + tensor linear_154_cast_fp16 = linear(bias = var_2864_to_fp16, weight = var_2863_to_fp16, x = var_2854_cast_fp16)[name = string("linear_154_cast_fp16")]; + string x_317_mode_0 = const()[name = string("x_317_mode_0"), val = string("EXACT")]; + tensor x_317_cast_fp16 = gelu(mode = x_317_mode_0, x = linear_154_cast_fp16)[name = string("x_317_cast_fp16")]; + tensor var_2869_to_fp16 = const()[name = string("op_2869_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(380425600)))]; + tensor var_2870_to_fp16 = const()[name = string("op_2870_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393532864)))]; + tensor linear_155_cast_fp16 = linear(bias = var_2870_to_fp16, weight = var_2869_to_fp16, x = x_317_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor x_319_cast_fp16 = add(x = x_313_cast_fp16, y = linear_155_cast_fp16)[name = string("x_319_cast_fp16")]; + int32 var_2880 = const()[name = string("op_2880"), val = int32(-1)]; + tensor var_2896_axes_0 = const()[name = string("op_2896_axes_0"), val = tensor([-1])]; + tensor blocks_26_attn_ln_weight_to_fp16 = const()[name = string("blocks_26_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393535488)))]; + tensor blocks_26_attn_ln_bias_to_fp16 = const()[name = string("blocks_26_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393538112)))]; + fp16 var_2886_to_fp16 = const()[name = string("op_2886_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2896_cast_fp16 = layer_norm(axes = var_2896_axes_0, beta = blocks_26_attn_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_attn_ln_weight_to_fp16, x = x_319_cast_fp16)[name = string("op_2896_cast_fp16")]; + tensor var_2907_to_fp16 = const()[name = string("op_2907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(393540736)))]; + tensor var_2908_to_fp16 = const()[name = string("op_2908_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396817600)))]; + tensor linear_156_cast_fp16 = linear(bias = var_2908_to_fp16, weight = var_2907_to_fp16, x = var_2896_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor var_2911_to_fp16 = const()[name = string("op_2911_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(396820224)))]; + tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2911_to_fp16, x = var_2896_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor var_2915_to_fp16 = const()[name = string("op_2915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(400097088)))]; + tensor var_2916_to_fp16 = const()[name = string("op_2916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403373952)))]; + tensor linear_158_cast_fp16 = linear(bias = var_2916_to_fp16, weight = var_2915_to_fp16, x = var_2896_cast_fp16)[name = string("linear_158_cast_fp16")]; + tensor var_2924 = const()[name = string("op_2924"), val = tensor([1, 1500, 20, -1])]; + tensor var_2925_cast_fp16 = reshape(shape = var_2924, x = linear_156_cast_fp16)[name = string("op_2925_cast_fp16")]; + tensor const_276_to_fp16 = const()[name = string("const_276_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_107_cast_fp16 = mul(x = var_2925_cast_fp16, y = const_276_to_fp16)[name = string("q_107_cast_fp16")]; + tensor var_2931 = const()[name = string("op_2931"), val = tensor([1, 1500, 20, -1])]; + tensor var_2932_cast_fp16 = reshape(shape = var_2931, x = linear_157_cast_fp16)[name = string("op_2932_cast_fp16")]; + tensor const_277_to_fp16 = const()[name = string("const_277_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_107_cast_fp16 = mul(x = var_2932_cast_fp16, y = const_277_to_fp16)[name = string("k_107_cast_fp16")]; + tensor var_2938 = const()[name = string("op_2938"), val = tensor([1, 1500, 20, -1])]; + tensor var_2939_cast_fp16 = reshape(shape = var_2938, x = linear_158_cast_fp16)[name = string("op_2939_cast_fp16")]; + tensor var_2940 = const()[name = string("op_2940"), val = tensor([0, 2, -3, -1])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = k_107_cast_fp16)[name = string("transpose_118")]; + tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = q_107_cast_fp16)[name = string("transpose_119")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_84, y = transpose_85)[name = string("qk_53_cast_fp16")]; + tensor var_2944_cast_fp16 = softmax(axis = var_2880, x = qk_53_cast_fp16)[name = string("op_2944_cast_fp16")]; + bool var_2946_transpose_x_0 = const()[name = string("op_2946_transpose_x_0"), val = bool(false)]; + bool var_2946_transpose_y_0 = const()[name = string("op_2946_transpose_y_0"), val = bool(false)]; + tensor v_107_cast_fp16 = transpose(perm = var_2940, x = var_2939_cast_fp16)[name = string("transpose_117")]; + tensor var_2946_cast_fp16 = matmul(transpose_x = var_2946_transpose_x_0, transpose_y = var_2946_transpose_y_0, x = var_2944_cast_fp16, y = v_107_cast_fp16)[name = string("op_2946_cast_fp16")]; + tensor var_2947 = const()[name = string("op_2947"), val = tensor([0, 2, 1, 3])]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([1, 1500, 1280])]; + tensor var_2948_cast_fp16 = transpose(perm = var_2947, x = var_2946_cast_fp16)[name = string("transpose_116")]; + tensor x_323_cast_fp16 = reshape(shape = concat_26, x = var_2948_cast_fp16)[name = string("x_323_cast_fp16")]; + tensor var_2952_to_fp16 = const()[name = string("op_2952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(403376576)))]; + tensor var_2953_to_fp16 = const()[name = string("op_2953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406653440)))]; + tensor linear_159_cast_fp16 = linear(bias = var_2953_to_fp16, weight = var_2952_to_fp16, x = x_323_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor x_325_cast_fp16 = add(x = x_319_cast_fp16, y = linear_159_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_2960_axes_0 = const()[name = string("op_2960_axes_0"), val = tensor([-1])]; + tensor blocks_26_mlp_ln_weight_to_fp16 = const()[name = string("blocks_26_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406656064)))]; + tensor blocks_26_mlp_ln_bias_to_fp16 = const()[name = string("blocks_26_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406658688)))]; + tensor var_2960_cast_fp16 = layer_norm(axes = var_2960_axes_0, beta = blocks_26_mlp_ln_bias_to_fp16, epsilon = var_2886_to_fp16, gamma = blocks_26_mlp_ln_weight_to_fp16, x = x_325_cast_fp16)[name = string("op_2960_cast_fp16")]; + tensor var_2969_to_fp16 = const()[name = string("op_2969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(406661312)))]; + tensor var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419768576)))]; + tensor linear_160_cast_fp16 = linear(bias = var_2970_to_fp16, weight = var_2969_to_fp16, x = var_2960_cast_fp16)[name = string("linear_160_cast_fp16")]; + string x_329_mode_0 = const()[name = string("x_329_mode_0"), val = string("EXACT")]; + tensor x_329_cast_fp16 = gelu(mode = x_329_mode_0, x = linear_160_cast_fp16)[name = string("x_329_cast_fp16")]; + tensor var_2975_to_fp16 = const()[name = string("op_2975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(419778880)))]; + tensor var_2976_to_fp16 = const()[name = string("op_2976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432886144)))]; + tensor linear_161_cast_fp16 = linear(bias = var_2976_to_fp16, weight = var_2975_to_fp16, x = x_329_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor x_331_cast_fp16 = add(x = x_325_cast_fp16, y = linear_161_cast_fp16)[name = string("x_331_cast_fp16")]; + int32 var_2986 = const()[name = string("op_2986"), val = int32(-1)]; + tensor var_3002_axes_0 = const()[name = string("op_3002_axes_0"), val = tensor([-1])]; + tensor blocks_27_attn_ln_weight_to_fp16 = const()[name = string("blocks_27_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432888768)))]; + tensor blocks_27_attn_ln_bias_to_fp16 = const()[name = string("blocks_27_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432891392)))]; + fp16 var_2992_to_fp16 = const()[name = string("op_2992_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3002_cast_fp16 = layer_norm(axes = var_3002_axes_0, beta = blocks_27_attn_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_attn_ln_weight_to_fp16, x = x_331_cast_fp16)[name = string("op_3002_cast_fp16")]; + tensor var_3013_to_fp16 = const()[name = string("op_3013_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(432894016)))]; + tensor var_3014_to_fp16 = const()[name = string("op_3014_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436170880)))]; + tensor linear_162_cast_fp16 = linear(bias = var_3014_to_fp16, weight = var_3013_to_fp16, x = var_3002_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor var_3017_to_fp16 = const()[name = string("op_3017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(436173504)))]; + tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3017_to_fp16, x = var_3002_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor var_3021_to_fp16 = const()[name = string("op_3021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(439450368)))]; + tensor var_3022_to_fp16 = const()[name = string("op_3022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442727232)))]; + tensor linear_164_cast_fp16 = linear(bias = var_3022_to_fp16, weight = var_3021_to_fp16, x = var_3002_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor var_3030 = const()[name = string("op_3030"), val = tensor([1, 1500, 20, -1])]; + tensor var_3031_cast_fp16 = reshape(shape = var_3030, x = linear_162_cast_fp16)[name = string("op_3031_cast_fp16")]; + tensor const_278_to_fp16 = const()[name = string("const_278_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_111_cast_fp16 = mul(x = var_3031_cast_fp16, y = const_278_to_fp16)[name = string("q_111_cast_fp16")]; + tensor var_3037 = const()[name = string("op_3037"), val = tensor([1, 1500, 20, -1])]; + tensor var_3038_cast_fp16 = reshape(shape = var_3037, x = linear_163_cast_fp16)[name = string("op_3038_cast_fp16")]; + tensor const_279_to_fp16 = const()[name = string("const_279_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_111_cast_fp16 = mul(x = var_3038_cast_fp16, y = const_279_to_fp16)[name = string("k_111_cast_fp16")]; + tensor var_3044 = const()[name = string("op_3044"), val = tensor([1, 1500, 20, -1])]; + tensor var_3045_cast_fp16 = reshape(shape = var_3044, x = linear_164_cast_fp16)[name = string("op_3045_cast_fp16")]; + tensor var_3046 = const()[name = string("op_3046"), val = tensor([0, 2, -3, -1])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = k_111_cast_fp16)[name = string("transpose_114")]; + tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = q_111_cast_fp16)[name = string("transpose_115")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_86, y = transpose_87)[name = string("qk_55_cast_fp16")]; + tensor var_3050_cast_fp16 = softmax(axis = var_2986, x = qk_55_cast_fp16)[name = string("op_3050_cast_fp16")]; + bool var_3052_transpose_x_0 = const()[name = string("op_3052_transpose_x_0"), val = bool(false)]; + bool var_3052_transpose_y_0 = const()[name = string("op_3052_transpose_y_0"), val = bool(false)]; + tensor v_111_cast_fp16 = transpose(perm = var_3046, x = var_3045_cast_fp16)[name = string("transpose_113")]; + tensor var_3052_cast_fp16 = matmul(transpose_x = var_3052_transpose_x_0, transpose_y = var_3052_transpose_y_0, x = var_3050_cast_fp16, y = v_111_cast_fp16)[name = string("op_3052_cast_fp16")]; + tensor var_3053 = const()[name = string("op_3053"), val = tensor([0, 2, 1, 3])]; + tensor concat_27 = const()[name = string("concat_27"), val = tensor([1, 1500, 1280])]; + tensor var_3054_cast_fp16 = transpose(perm = var_3053, x = var_3052_cast_fp16)[name = string("transpose_112")]; + tensor x_335_cast_fp16 = reshape(shape = concat_27, x = var_3054_cast_fp16)[name = string("x_335_cast_fp16")]; + tensor var_3058_to_fp16 = const()[name = string("op_3058_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(442729856)))]; + tensor var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446006720)))]; + tensor linear_165_cast_fp16 = linear(bias = var_3059_to_fp16, weight = var_3058_to_fp16, x = x_335_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_165_cast_fp16)[name = string("x_337_cast_fp16")]; + tensor var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor([-1])]; + tensor blocks_27_mlp_ln_weight_to_fp16 = const()[name = string("blocks_27_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446009344)))]; + tensor blocks_27_mlp_ln_bias_to_fp16 = const()[name = string("blocks_27_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446011968)))]; + tensor var_3066_cast_fp16 = layer_norm(axes = var_3066_axes_0, beta = blocks_27_mlp_ln_bias_to_fp16, epsilon = var_2992_to_fp16, gamma = blocks_27_mlp_ln_weight_to_fp16, x = x_337_cast_fp16)[name = string("op_3066_cast_fp16")]; + tensor var_3075_to_fp16 = const()[name = string("op_3075_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(446014592)))]; + tensor var_3076_to_fp16 = const()[name = string("op_3076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459121856)))]; + tensor linear_166_cast_fp16 = linear(bias = var_3076_to_fp16, weight = var_3075_to_fp16, x = var_3066_cast_fp16)[name = string("linear_166_cast_fp16")]; + string x_341_mode_0 = const()[name = string("x_341_mode_0"), val = string("EXACT")]; + tensor x_341_cast_fp16 = gelu(mode = x_341_mode_0, x = linear_166_cast_fp16)[name = string("x_341_cast_fp16")]; + tensor var_3081_to_fp16 = const()[name = string("op_3081_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(459132160)))]; + tensor var_3082_to_fp16 = const()[name = string("op_3082_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472239424)))]; + tensor linear_167_cast_fp16 = linear(bias = var_3082_to_fp16, weight = var_3081_to_fp16, x = x_341_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor x_343_cast_fp16 = add(x = x_337_cast_fp16, y = linear_167_cast_fp16)[name = string("x_343_cast_fp16")]; + int32 var_3092 = const()[name = string("op_3092"), val = int32(-1)]; + tensor var_3108_axes_0 = const()[name = string("op_3108_axes_0"), val = tensor([-1])]; + tensor blocks_28_attn_ln_weight_to_fp16 = const()[name = string("blocks_28_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472242048)))]; + tensor blocks_28_attn_ln_bias_to_fp16 = const()[name = string("blocks_28_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472244672)))]; + fp16 var_3098_to_fp16 = const()[name = string("op_3098_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3108_cast_fp16 = layer_norm(axes = var_3108_axes_0, beta = blocks_28_attn_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_attn_ln_weight_to_fp16, x = x_343_cast_fp16)[name = string("op_3108_cast_fp16")]; + tensor var_3119_to_fp16 = const()[name = string("op_3119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(472247296)))]; + tensor var_3120_to_fp16 = const()[name = string("op_3120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475524160)))]; + tensor linear_168_cast_fp16 = linear(bias = var_3120_to_fp16, weight = var_3119_to_fp16, x = var_3108_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor var_3123_to_fp16 = const()[name = string("op_3123_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(475526784)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3123_to_fp16, x = var_3108_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor var_3127_to_fp16 = const()[name = string("op_3127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(478803648)))]; + tensor var_3128_to_fp16 = const()[name = string("op_3128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482080512)))]; + tensor linear_170_cast_fp16 = linear(bias = var_3128_to_fp16, weight = var_3127_to_fp16, x = var_3108_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor var_3136 = const()[name = string("op_3136"), val = tensor([1, 1500, 20, -1])]; + tensor var_3137_cast_fp16 = reshape(shape = var_3136, x = linear_168_cast_fp16)[name = string("op_3137_cast_fp16")]; + tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_115_cast_fp16 = mul(x = var_3137_cast_fp16, y = const_280_to_fp16)[name = string("q_115_cast_fp16")]; + tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 1500, 20, -1])]; + tensor var_3144_cast_fp16 = reshape(shape = var_3143, x = linear_169_cast_fp16)[name = string("op_3144_cast_fp16")]; + tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_3144_cast_fp16, y = const_281_to_fp16)[name = string("k_115_cast_fp16")]; + tensor var_3150 = const()[name = string("op_3150"), val = tensor([1, 1500, 20, -1])]; + tensor var_3151_cast_fp16 = reshape(shape = var_3150, x = linear_170_cast_fp16)[name = string("op_3151_cast_fp16")]; + tensor var_3152 = const()[name = string("op_3152"), val = tensor([0, 2, -3, -1])]; + bool qk_57_transpose_x_0 = const()[name = string("qk_57_transpose_x_0"), val = bool(false)]; + bool qk_57_transpose_y_0 = const()[name = string("qk_57_transpose_y_0"), val = bool(false)]; + tensor transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = k_115_cast_fp16)[name = string("transpose_110")]; + tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = q_115_cast_fp16)[name = string("transpose_111")]; + tensor qk_57_cast_fp16 = matmul(transpose_x = qk_57_transpose_x_0, transpose_y = qk_57_transpose_y_0, x = transpose_88, y = transpose_89)[name = string("qk_57_cast_fp16")]; + tensor var_3156_cast_fp16 = softmax(axis = var_3092, x = qk_57_cast_fp16)[name = string("op_3156_cast_fp16")]; + bool var_3158_transpose_x_0 = const()[name = string("op_3158_transpose_x_0"), val = bool(false)]; + bool var_3158_transpose_y_0 = const()[name = string("op_3158_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_3152, x = var_3151_cast_fp16)[name = string("transpose_109")]; + tensor var_3158_cast_fp16 = matmul(transpose_x = var_3158_transpose_x_0, transpose_y = var_3158_transpose_y_0, x = var_3156_cast_fp16, y = v_115_cast_fp16)[name = string("op_3158_cast_fp16")]; + tensor var_3159 = const()[name = string("op_3159"), val = tensor([0, 2, 1, 3])]; + tensor concat_28 = const()[name = string("concat_28"), val = tensor([1, 1500, 1280])]; + tensor var_3160_cast_fp16 = transpose(perm = var_3159, x = var_3158_cast_fp16)[name = string("transpose_108")]; + tensor x_347_cast_fp16 = reshape(shape = concat_28, x = var_3160_cast_fp16)[name = string("x_347_cast_fp16")]; + tensor var_3164_to_fp16 = const()[name = string("op_3164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(482083136)))]; + tensor var_3165_to_fp16 = const()[name = string("op_3165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485360000)))]; + tensor linear_171_cast_fp16 = linear(bias = var_3165_to_fp16, weight = var_3164_to_fp16, x = x_347_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor x_349_cast_fp16 = add(x = x_343_cast_fp16, y = linear_171_cast_fp16)[name = string("x_349_cast_fp16")]; + tensor var_3172_axes_0 = const()[name = string("op_3172_axes_0"), val = tensor([-1])]; + tensor blocks_28_mlp_ln_weight_to_fp16 = const()[name = string("blocks_28_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485362624)))]; + tensor blocks_28_mlp_ln_bias_to_fp16 = const()[name = string("blocks_28_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485365248)))]; + tensor var_3172_cast_fp16 = layer_norm(axes = var_3172_axes_0, beta = blocks_28_mlp_ln_bias_to_fp16, epsilon = var_3098_to_fp16, gamma = blocks_28_mlp_ln_weight_to_fp16, x = x_349_cast_fp16)[name = string("op_3172_cast_fp16")]; + tensor var_3181_to_fp16 = const()[name = string("op_3181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(485367872)))]; + tensor var_3182_to_fp16 = const()[name = string("op_3182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498475136)))]; + tensor linear_172_cast_fp16 = linear(bias = var_3182_to_fp16, weight = var_3181_to_fp16, x = var_3172_cast_fp16)[name = string("linear_172_cast_fp16")]; + string x_353_mode_0 = const()[name = string("x_353_mode_0"), val = string("EXACT")]; + tensor x_353_cast_fp16 = gelu(mode = x_353_mode_0, x = linear_172_cast_fp16)[name = string("x_353_cast_fp16")]; + tensor var_3187_to_fp16 = const()[name = string("op_3187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(498485440)))]; + tensor var_3188_to_fp16 = const()[name = string("op_3188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511592704)))]; + tensor linear_173_cast_fp16 = linear(bias = var_3188_to_fp16, weight = var_3187_to_fp16, x = x_353_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor x_355_cast_fp16 = add(x = x_349_cast_fp16, y = linear_173_cast_fp16)[name = string("x_355_cast_fp16")]; + int32 var_3198 = const()[name = string("op_3198"), val = int32(-1)]; + tensor var_3214_axes_0 = const()[name = string("op_3214_axes_0"), val = tensor([-1])]; + tensor blocks_29_attn_ln_weight_to_fp16 = const()[name = string("blocks_29_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511595328)))]; + tensor blocks_29_attn_ln_bias_to_fp16 = const()[name = string("blocks_29_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511597952)))]; + fp16 var_3204_to_fp16 = const()[name = string("op_3204_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3214_cast_fp16 = layer_norm(axes = var_3214_axes_0, beta = blocks_29_attn_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_attn_ln_weight_to_fp16, x = x_355_cast_fp16)[name = string("op_3214_cast_fp16")]; + tensor var_3225_to_fp16 = const()[name = string("op_3225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(511600576)))]; + tensor var_3226_to_fp16 = const()[name = string("op_3226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514877440)))]; + tensor linear_174_cast_fp16 = linear(bias = var_3226_to_fp16, weight = var_3225_to_fp16, x = var_3214_cast_fp16)[name = string("linear_174_cast_fp16")]; + tensor var_3229_to_fp16 = const()[name = string("op_3229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(514880064)))]; + tensor linear_175_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3229_to_fp16, x = var_3214_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor var_3233_to_fp16 = const()[name = string("op_3233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(518156928)))]; + tensor var_3234_to_fp16 = const()[name = string("op_3234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521433792)))]; + tensor linear_176_cast_fp16 = linear(bias = var_3234_to_fp16, weight = var_3233_to_fp16, x = var_3214_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor var_3242 = const()[name = string("op_3242"), val = tensor([1, 1500, 20, -1])]; + tensor var_3243_cast_fp16 = reshape(shape = var_3242, x = linear_174_cast_fp16)[name = string("op_3243_cast_fp16")]; + tensor const_282_to_fp16 = const()[name = string("const_282_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_119_cast_fp16 = mul(x = var_3243_cast_fp16, y = const_282_to_fp16)[name = string("q_119_cast_fp16")]; + tensor var_3249 = const()[name = string("op_3249"), val = tensor([1, 1500, 20, -1])]; + tensor var_3250_cast_fp16 = reshape(shape = var_3249, x = linear_175_cast_fp16)[name = string("op_3250_cast_fp16")]; + tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_119_cast_fp16 = mul(x = var_3250_cast_fp16, y = const_283_to_fp16)[name = string("k_119_cast_fp16")]; + tensor var_3256 = const()[name = string("op_3256"), val = tensor([1, 1500, 20, -1])]; + tensor var_3257_cast_fp16 = reshape(shape = var_3256, x = linear_176_cast_fp16)[name = string("op_3257_cast_fp16")]; + tensor var_3258 = const()[name = string("op_3258"), val = tensor([0, 2, -3, -1])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = k_119_cast_fp16)[name = string("transpose_106")]; + tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = q_119_cast_fp16)[name = string("transpose_107")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_90, y = transpose_91)[name = string("qk_59_cast_fp16")]; + tensor var_3262_cast_fp16 = softmax(axis = var_3198, x = qk_59_cast_fp16)[name = string("op_3262_cast_fp16")]; + bool var_3264_transpose_x_0 = const()[name = string("op_3264_transpose_x_0"), val = bool(false)]; + bool var_3264_transpose_y_0 = const()[name = string("op_3264_transpose_y_0"), val = bool(false)]; + tensor v_119_cast_fp16 = transpose(perm = var_3258, x = var_3257_cast_fp16)[name = string("transpose_105")]; + tensor var_3264_cast_fp16 = matmul(transpose_x = var_3264_transpose_x_0, transpose_y = var_3264_transpose_y_0, x = var_3262_cast_fp16, y = v_119_cast_fp16)[name = string("op_3264_cast_fp16")]; + tensor var_3265 = const()[name = string("op_3265"), val = tensor([0, 2, 1, 3])]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([1, 1500, 1280])]; + tensor var_3266_cast_fp16 = transpose(perm = var_3265, x = var_3264_cast_fp16)[name = string("transpose_104")]; + tensor x_359_cast_fp16 = reshape(shape = concat_29, x = var_3266_cast_fp16)[name = string("x_359_cast_fp16")]; + tensor var_3270_to_fp16 = const()[name = string("op_3270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(521436416)))]; + tensor var_3271_to_fp16 = const()[name = string("op_3271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524713280)))]; + tensor linear_177_cast_fp16 = linear(bias = var_3271_to_fp16, weight = var_3270_to_fp16, x = x_359_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor x_361_cast_fp16 = add(x = x_355_cast_fp16, y = linear_177_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_3278_axes_0 = const()[name = string("op_3278_axes_0"), val = tensor([-1])]; + tensor blocks_29_mlp_ln_weight_to_fp16 = const()[name = string("blocks_29_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524715904)))]; + tensor blocks_29_mlp_ln_bias_to_fp16 = const()[name = string("blocks_29_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524718528)))]; + tensor var_3278_cast_fp16 = layer_norm(axes = var_3278_axes_0, beta = blocks_29_mlp_ln_bias_to_fp16, epsilon = var_3204_to_fp16, gamma = blocks_29_mlp_ln_weight_to_fp16, x = x_361_cast_fp16)[name = string("op_3278_cast_fp16")]; + tensor var_3287_to_fp16 = const()[name = string("op_3287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(524721152)))]; + tensor var_3288_to_fp16 = const()[name = string("op_3288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537828416)))]; + tensor linear_178_cast_fp16 = linear(bias = var_3288_to_fp16, weight = var_3287_to_fp16, x = var_3278_cast_fp16)[name = string("linear_178_cast_fp16")]; + string x_365_mode_0 = const()[name = string("x_365_mode_0"), val = string("EXACT")]; + tensor x_365_cast_fp16 = gelu(mode = x_365_mode_0, x = linear_178_cast_fp16)[name = string("x_365_cast_fp16")]; + tensor var_3293_to_fp16 = const()[name = string("op_3293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(537838720)))]; + tensor var_3294_to_fp16 = const()[name = string("op_3294_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550945984)))]; + tensor linear_179_cast_fp16 = linear(bias = var_3294_to_fp16, weight = var_3293_to_fp16, x = x_365_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor x_367_cast_fp16 = add(x = x_361_cast_fp16, y = linear_179_cast_fp16)[name = string("x_367_cast_fp16")]; + int32 var_3304 = const()[name = string("op_3304"), val = int32(-1)]; + tensor var_3320_axes_0 = const()[name = string("op_3320_axes_0"), val = tensor([-1])]; + tensor blocks_30_attn_ln_weight_to_fp16 = const()[name = string("blocks_30_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550948608)))]; + tensor blocks_30_attn_ln_bias_to_fp16 = const()[name = string("blocks_30_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550951232)))]; + fp16 var_3310_to_fp16 = const()[name = string("op_3310_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3320_cast_fp16 = layer_norm(axes = var_3320_axes_0, beta = blocks_30_attn_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_attn_ln_weight_to_fp16, x = x_367_cast_fp16)[name = string("op_3320_cast_fp16")]; + tensor var_3331_to_fp16 = const()[name = string("op_3331_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(550953856)))]; + tensor var_3332_to_fp16 = const()[name = string("op_3332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554230720)))]; + tensor linear_180_cast_fp16 = linear(bias = var_3332_to_fp16, weight = var_3331_to_fp16, x = var_3320_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor var_3335_to_fp16 = const()[name = string("op_3335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(554233344)))]; + tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3335_to_fp16, x = var_3320_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor var_3339_to_fp16 = const()[name = string("op_3339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(557510208)))]; + tensor var_3340_to_fp16 = const()[name = string("op_3340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560787072)))]; + tensor linear_182_cast_fp16 = linear(bias = var_3340_to_fp16, weight = var_3339_to_fp16, x = var_3320_cast_fp16)[name = string("linear_182_cast_fp16")]; + tensor var_3348 = const()[name = string("op_3348"), val = tensor([1, 1500, 20, -1])]; + tensor var_3349_cast_fp16 = reshape(shape = var_3348, x = linear_180_cast_fp16)[name = string("op_3349_cast_fp16")]; + tensor const_284_to_fp16 = const()[name = string("const_284_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_123_cast_fp16 = mul(x = var_3349_cast_fp16, y = const_284_to_fp16)[name = string("q_123_cast_fp16")]; + tensor var_3355 = const()[name = string("op_3355"), val = tensor([1, 1500, 20, -1])]; + tensor var_3356_cast_fp16 = reshape(shape = var_3355, x = linear_181_cast_fp16)[name = string("op_3356_cast_fp16")]; + tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_123_cast_fp16 = mul(x = var_3356_cast_fp16, y = const_285_to_fp16)[name = string("k_123_cast_fp16")]; + tensor var_3362 = const()[name = string("op_3362"), val = tensor([1, 1500, 20, -1])]; + tensor var_3363_cast_fp16 = reshape(shape = var_3362, x = linear_182_cast_fp16)[name = string("op_3363_cast_fp16")]; + tensor var_3364 = const()[name = string("op_3364"), val = tensor([0, 2, -3, -1])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = k_123_cast_fp16)[name = string("transpose_102")]; + tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = q_123_cast_fp16)[name = string("transpose_103")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_92, y = transpose_93)[name = string("qk_61_cast_fp16")]; + tensor var_3368_cast_fp16 = softmax(axis = var_3304, x = qk_61_cast_fp16)[name = string("op_3368_cast_fp16")]; + bool var_3370_transpose_x_0 = const()[name = string("op_3370_transpose_x_0"), val = bool(false)]; + bool var_3370_transpose_y_0 = const()[name = string("op_3370_transpose_y_0"), val = bool(false)]; + tensor v_123_cast_fp16 = transpose(perm = var_3364, x = var_3363_cast_fp16)[name = string("transpose_101")]; + tensor var_3370_cast_fp16 = matmul(transpose_x = var_3370_transpose_x_0, transpose_y = var_3370_transpose_y_0, x = var_3368_cast_fp16, y = v_123_cast_fp16)[name = string("op_3370_cast_fp16")]; + tensor var_3371 = const()[name = string("op_3371"), val = tensor([0, 2, 1, 3])]; + tensor concat_30 = const()[name = string("concat_30"), val = tensor([1, 1500, 1280])]; + tensor var_3372_cast_fp16 = transpose(perm = var_3371, x = var_3370_cast_fp16)[name = string("transpose_100")]; + tensor x_371_cast_fp16 = reshape(shape = concat_30, x = var_3372_cast_fp16)[name = string("x_371_cast_fp16")]; + tensor var_3376_to_fp16 = const()[name = string("op_3376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(560789696)))]; + tensor var_3377_to_fp16 = const()[name = string("op_3377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564066560)))]; + tensor linear_183_cast_fp16 = linear(bias = var_3377_to_fp16, weight = var_3376_to_fp16, x = x_371_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor x_373_cast_fp16 = add(x = x_367_cast_fp16, y = linear_183_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_3384_axes_0 = const()[name = string("op_3384_axes_0"), val = tensor([-1])]; + tensor blocks_30_mlp_ln_weight_to_fp16 = const()[name = string("blocks_30_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564069184)))]; + tensor blocks_30_mlp_ln_bias_to_fp16 = const()[name = string("blocks_30_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564071808)))]; + tensor var_3384_cast_fp16 = layer_norm(axes = var_3384_axes_0, beta = blocks_30_mlp_ln_bias_to_fp16, epsilon = var_3310_to_fp16, gamma = blocks_30_mlp_ln_weight_to_fp16, x = x_373_cast_fp16)[name = string("op_3384_cast_fp16")]; + tensor var_3393_to_fp16 = const()[name = string("op_3393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(564074432)))]; + tensor var_3394_to_fp16 = const()[name = string("op_3394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577181696)))]; + tensor linear_184_cast_fp16 = linear(bias = var_3394_to_fp16, weight = var_3393_to_fp16, x = var_3384_cast_fp16)[name = string("linear_184_cast_fp16")]; + string x_377_mode_0 = const()[name = string("x_377_mode_0"), val = string("EXACT")]; + tensor x_377_cast_fp16 = gelu(mode = x_377_mode_0, x = linear_184_cast_fp16)[name = string("x_377_cast_fp16")]; + tensor var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(577192000)))]; + tensor var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590299264)))]; + tensor linear_185_cast_fp16 = linear(bias = var_3400_to_fp16, weight = var_3399_to_fp16, x = x_377_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor x_379_cast_fp16 = add(x = x_373_cast_fp16, y = linear_185_cast_fp16)[name = string("x_379_cast_fp16")]; + int32 var_3410 = const()[name = string("op_3410"), val = int32(-1)]; + tensor var_3426_axes_0 = const()[name = string("op_3426_axes_0"), val = tensor([-1])]; + tensor blocks_31_attn_ln_weight_to_fp16 = const()[name = string("blocks_31_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590301888)))]; + tensor blocks_31_attn_ln_bias_to_fp16 = const()[name = string("blocks_31_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590304512)))]; + fp16 var_3416_to_fp16 = const()[name = string("op_3416_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3426_cast_fp16 = layer_norm(axes = var_3426_axes_0, beta = blocks_31_attn_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_attn_ln_weight_to_fp16, x = x_379_cast_fp16)[name = string("op_3426_cast_fp16")]; + tensor var_3437_to_fp16 = const()[name = string("op_3437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(590307136)))]; + tensor var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593584000)))]; + tensor linear_186_cast_fp16 = linear(bias = var_3438_to_fp16, weight = var_3437_to_fp16, x = var_3426_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_3441_to_fp16 = const()[name = string("op_3441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(593586624)))]; + tensor linear_187_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3441_to_fp16, x = var_3426_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(596863488)))]; + tensor var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600140352)))]; + tensor linear_188_cast_fp16 = linear(bias = var_3446_to_fp16, weight = var_3445_to_fp16, x = var_3426_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor var_3454 = const()[name = string("op_3454"), val = tensor([1, 1500, 20, -1])]; + tensor var_3455_cast_fp16 = reshape(shape = var_3454, x = linear_186_cast_fp16)[name = string("op_3455_cast_fp16")]; + tensor const_286_to_fp16 = const()[name = string("const_286_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_3455_cast_fp16, y = const_286_to_fp16)[name = string("q_cast_fp16")]; + tensor var_3461 = const()[name = string("op_3461"), val = tensor([1, 1500, 20, -1])]; + tensor var_3462_cast_fp16 = reshape(shape = var_3461, x = linear_187_cast_fp16)[name = string("op_3462_cast_fp16")]; + tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_3462_cast_fp16, y = const_287_to_fp16)[name = string("k_cast_fp16")]; + tensor var_3468 = const()[name = string("op_3468"), val = tensor([1, 1500, 20, -1])]; + tensor var_3469_cast_fp16 = reshape(shape = var_3468, x = linear_188_cast_fp16)[name = string("op_3469_cast_fp16")]; + tensor var_3470 = const()[name = string("op_3470"), val = tensor([0, 2, -3, -1])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = k_cast_fp16)[name = string("transpose_98")]; + tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = q_cast_fp16)[name = string("transpose_99")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_94, y = transpose_95)[name = string("qk_cast_fp16")]; + tensor var_3474_cast_fp16 = softmax(axis = var_3410, x = qk_cast_fp16)[name = string("op_3474_cast_fp16")]; + bool var_3476_transpose_x_0 = const()[name = string("op_3476_transpose_x_0"), val = bool(false)]; + bool var_3476_transpose_y_0 = const()[name = string("op_3476_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_3470, x = var_3469_cast_fp16)[name = string("transpose_97")]; + tensor var_3476_cast_fp16 = matmul(transpose_x = var_3476_transpose_x_0, transpose_y = var_3476_transpose_y_0, x = var_3474_cast_fp16, y = v_cast_fp16)[name = string("op_3476_cast_fp16")]; + tensor var_3477 = const()[name = string("op_3477"), val = tensor([0, 2, 1, 3])]; + tensor concat_31 = const()[name = string("concat_31"), val = tensor([1, 1500, 1280])]; + tensor var_3478_cast_fp16 = transpose(perm = var_3477, x = var_3476_cast_fp16)[name = string("transpose_96")]; + tensor x_383_cast_fp16 = reshape(shape = concat_31, x = var_3478_cast_fp16)[name = string("x_383_cast_fp16")]; + tensor var_3482_to_fp16 = const()[name = string("op_3482_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(600142976)))]; + tensor var_3483_to_fp16 = const()[name = string("op_3483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603419840)))]; + tensor linear_189_cast_fp16 = linear(bias = var_3483_to_fp16, weight = var_3482_to_fp16, x = x_383_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor x_385_cast_fp16 = add(x = x_379_cast_fp16, y = linear_189_cast_fp16)[name = string("x_385_cast_fp16")]; + tensor var_3490_axes_0 = const()[name = string("op_3490_axes_0"), val = tensor([-1])]; + tensor blocks_31_mlp_ln_weight_to_fp16 = const()[name = string("blocks_31_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603422464)))]; + tensor blocks_31_mlp_ln_bias_to_fp16 = const()[name = string("blocks_31_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603425088)))]; + tensor var_3490_cast_fp16 = layer_norm(axes = var_3490_axes_0, beta = blocks_31_mlp_ln_bias_to_fp16, epsilon = var_3416_to_fp16, gamma = blocks_31_mlp_ln_weight_to_fp16, x = x_385_cast_fp16)[name = string("op_3490_cast_fp16")]; + tensor var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(603427712)))]; + tensor var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616534976)))]; + tensor linear_190_cast_fp16 = linear(bias = var_3500_to_fp16, weight = var_3499_to_fp16, x = var_3490_cast_fp16)[name = string("linear_190_cast_fp16")]; + string x_389_mode_0 = const()[name = string("x_389_mode_0"), val = string("EXACT")]; + tensor x_389_cast_fp16 = gelu(mode = x_389_mode_0, x = linear_190_cast_fp16)[name = string("x_389_cast_fp16")]; + tensor var_3505_to_fp16 = const()[name = string("op_3505_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(616545280)))]; + tensor var_3506_to_fp16 = const()[name = string("op_3506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629652544)))]; + tensor linear_191_cast_fp16 = linear(bias = var_3506_to_fp16, weight = var_3505_to_fp16, x = x_389_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_385_cast_fp16, y = linear_191_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_3519_axes_0 = const()[name = string("op_3519_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629655168)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/1-weight.bin"), offset = uint64(629657792)))]; + fp16 var_3510_to_fp16 = const()[name = string("op_3510_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_3519_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_3510_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_3519_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin b/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..d74dcc72eb085c49244201f90644c01049a0a54a --- /dev/null +++ b/large-v3/encoder.mlmodelc/model1/weights/1-weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f870130a04ffe31ab992466d0f4a0da493140364dae94a946e4fd5aaf80e8af +size 629660416 diff --git a/large-v3/model_dims.json b/large-v3/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..4e5155a7f1c64dbc222d61edfe1e871c529efe4f --- /dev/null +++ b/large-v3/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 128, + "n_audio_ctx": 1500, + "n_audio_state": 1280, + "n_audio_head": 20, + "n_audio_layer": 32, + "n_vocab": 51866, + "n_text_ctx": 448, + "n_text_state": 1280, + "n_text_head": 20, + "n_text_layer": 32 +} \ No newline at end of file diff --git a/medium/decoder_first.mlmodelc/analytics/coremldata.bin b/medium/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..346d4ba25ea5a3de75867619133a1dae47f5bd40 --- /dev/null +++ b/medium/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88974e233d54396d36bcc56cb3529205db7703529c6fd653b711e4a0d45ccea8 +size 243 diff --git a/medium/decoder_first.mlmodelc/coremldata.bin b/medium/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..825710543896f917d3afd2630cd980008fa14610 --- /dev/null +++ b/medium/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f60bbbb075152da3db06cf9e036d38ae791a5c37ccb1530a22501166053edf +size 453 diff --git a/medium/decoder_first.mlmodelc/metadata.json b/medium/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..69768ea5b31d5ef957c2d6853b3a4bc4f08ba7d5 --- /dev/null +++ b/medium/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 50, + "Shape" : 48, + "Ios18.linear" : 48, + "Identity" : 1, + "Ios18.gather" : 48, + "Ios18.concat" : 48, + "Ios18.sliceUpdate" : 50, + "Ios18.cast" : 96, + "Ios18.expandDims" : 48, + "Ios18.readState" : 50 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 448 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 448, 1024]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 448 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 448, 1024]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 1500, 1024]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 1500, 1024]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 1024", + "shapeRange" : "[[1, 1], [1, 1500], [1024, 1024]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 1024)", + "type" : "MultiArray", + "shape" : "[1, 1, 1024]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/medium/decoder_first.mlmodelc/model.mil b/medium/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..53c1f14bee99cd2905ac08f508a89178e0096a53 --- /dev/null +++ b/medium/decoder_first.mlmodelc/model.mil @@ -0,0 +1,1395 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 1024]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [1024, 1024]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_50_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_51_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_115_to_fp16 = const()[name = string("op_115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22020224)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24117440)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_115_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24119552)))]; + tensor var_120_to_fp16 = const()[name = string("op_120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26216768)))]; + tensor linear_1_cast_fp16 = linear(bias = var_120_to_fp16, weight = var_119_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_122_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_122_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_122_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_122_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_122_shape_cast_fp16_to_int16 = cast(dtype = var_122_shape_cast_fp16_to_int16_dtype_0, x = var_122_shape_cast_fp16)[name = string("cast_151")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_122_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_150")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_52 = read_state(input = k_cache2)[name = string("coreml_update_state_52")]; + tensor var_127_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_127_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_127_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_127_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_127_shape_cast_fp16_to_uint16 = cast(dtype = var_127_shape_cast_fp16_to_uint16_dtype_0, x = var_127_shape_cast_fp16)[name = string("cast_149")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_127_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_148")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_53 = read_state(input = v_cache2)[name = string("coreml_update_state_53")]; + tensor var_149_to_fp16 = const()[name = string("op_149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26218880)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_149_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_153_to_fp16 = const()[name = string("op_153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28316096)))]; + tensor var_154_to_fp16 = const()[name = string("op_154_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30413312)))]; + tensor linear_3_cast_fp16 = linear(bias = var_154_to_fp16, weight = var_153_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_156_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_156_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_156_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_156_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_156_shape_cast_fp16_to_uint16 = cast(dtype = var_156_shape_cast_fp16_to_uint16_dtype_0, x = var_156_shape_cast_fp16)[name = string("cast_147")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_156_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_146")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_52)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_54 = read_state(input = k_cache2)[name = string("coreml_update_state_54")]; + tensor var_161_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_161_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_161_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_161_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_161_shape_cast_fp16_to_uint16 = cast(dtype = var_161_shape_cast_fp16_to_uint16_dtype_0, x = var_161_shape_cast_fp16)[name = string("cast_145")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_161_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_144")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_53)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_55 = read_state(input = v_cache2)[name = string("coreml_update_state_55")]; + tensor var_183_to_fp16 = const()[name = string("op_183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30415424)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_183_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_187_to_fp16 = const()[name = string("op_187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32512640)))]; + tensor var_188_to_fp16 = const()[name = string("op_188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34609856)))]; + tensor linear_5_cast_fp16 = linear(bias = var_188_to_fp16, weight = var_187_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_190_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_190_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_190_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_190_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_190_shape_cast_fp16_to_uint16 = cast(dtype = var_190_shape_cast_fp16_to_uint16_dtype_0, x = var_190_shape_cast_fp16)[name = string("cast_143")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_190_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_142")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_54)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_56 = read_state(input = k_cache2)[name = string("coreml_update_state_56")]; + tensor var_195_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_195_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_195_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_195_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_195_shape_cast_fp16_to_uint16 = cast(dtype = var_195_shape_cast_fp16_to_uint16_dtype_0, x = var_195_shape_cast_fp16)[name = string("cast_141")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_195_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_140")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_55)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_57 = read_state(input = v_cache2)[name = string("coreml_update_state_57")]; + tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34611968)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_217_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_221_to_fp16 = const()[name = string("op_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36709184)))]; + tensor var_222_to_fp16 = const()[name = string("op_222_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38806400)))]; + tensor linear_7_cast_fp16 = linear(bias = var_222_to_fp16, weight = var_221_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_224_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_224_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_224_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_224_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_224_shape_cast_fp16_to_uint16 = cast(dtype = var_224_shape_cast_fp16_to_uint16_dtype_0, x = var_224_shape_cast_fp16)[name = string("cast_139")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_224_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_138")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_56)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_58 = read_state(input = k_cache2)[name = string("coreml_update_state_58")]; + tensor var_229_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_229_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_229_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_229_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_229_shape_cast_fp16_to_uint16 = cast(dtype = var_229_shape_cast_fp16_to_uint16_dtype_0, x = var_229_shape_cast_fp16)[name = string("cast_137")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_229_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_136")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_57)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_59 = read_state(input = v_cache2)[name = string("coreml_update_state_59")]; + tensor var_251_to_fp16 = const()[name = string("op_251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38808512)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_251_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")]; + tensor var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40905728)))]; + tensor var_256_to_fp16 = const()[name = string("op_256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43002944)))]; + tensor linear_9_cast_fp16 = linear(bias = var_256_to_fp16, weight = var_255_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")]; + tensor var_258_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_258_shape_cast_fp16")]; + int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)]; + int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)]; + bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)]; + string var_258_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_258_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)]; + tensor var_258_shape_cast_fp16_to_uint16 = cast(dtype = var_258_shape_cast_fp16_to_uint16_dtype_0, x = var_258_shape_cast_fp16)[name = string("cast_135")]; + uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_258_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")]; + string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor([0])]; + int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_134")]; + tensor expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([4, 0, 0, 0])]; + tensor concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor([0])]; + tensor concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor([0])]; + tensor concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor([0])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")]; + tensor k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_58)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = k_cache2)[name = string("coreml_update_state_60")]; + tensor var_263_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_263_shape_cast_fp16")]; + int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)]; + int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)]; + bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)]; + string var_263_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_263_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)]; + tensor var_263_shape_cast_fp16_to_uint16 = cast(dtype = var_263_shape_cast_fp16_to_uint16_dtype_0, x = var_263_shape_cast_fp16)[name = string("cast_133")]; + uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_263_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")]; + string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor([0])]; + int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_132")]; + tensor expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; + tensor concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor([0])]; + tensor concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor([0])]; + tensor concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor([0])]; + int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; + bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; + tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")]; + tensor v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_59)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = v_cache2)[name = string("coreml_update_state_61")]; + tensor var_285_to_fp16 = const()[name = string("op_285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43005056)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_285_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")]; + tensor var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45102272)))]; + tensor var_290_to_fp16 = const()[name = string("op_290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47199488)))]; + tensor linear_11_cast_fp16 = linear(bias = var_290_to_fp16, weight = var_289_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")]; + tensor var_292_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_292_shape_cast_fp16")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_292_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_292_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)]; + tensor var_292_shape_cast_fp16_to_uint16 = cast(dtype = var_292_shape_cast_fp16_to_uint16_dtype_0, x = var_292_shape_cast_fp16)[name = string("cast_131")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_292_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_130")]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([5, 0, 0, 0])]; + tensor concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor([0])]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")]; + tensor k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_60)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = k_cache2)[name = string("coreml_update_state_62")]; + tensor var_297_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_297_shape_cast_fp16")]; + int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)]; + int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)]; + bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)]; + string var_297_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_297_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)]; + tensor var_297_shape_cast_fp16_to_uint16 = cast(dtype = var_297_shape_cast_fp16_to_uint16_dtype_0, x = var_297_shape_cast_fp16)[name = string("cast_129")]; + uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_297_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")]; + string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor([0])]; + int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_128")]; + tensor expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([5, 0, 0, 0])]; + tensor concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor([0])]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")]; + tensor v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_61)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = v_cache2)[name = string("coreml_update_state_63")]; + tensor var_319_to_fp16 = const()[name = string("op_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47201600)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_319_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")]; + tensor var_323_to_fp16 = const()[name = string("op_323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49298816)))]; + tensor var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51396032)))]; + tensor linear_13_cast_fp16 = linear(bias = var_324_to_fp16, weight = var_323_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")]; + tensor var_326_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_326_shape_cast_fp16")]; + int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)]; + int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)]; + bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)]; + string var_326_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_326_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)]; + tensor var_326_shape_cast_fp16_to_uint16 = cast(dtype = var_326_shape_cast_fp16_to_uint16_dtype_0, x = var_326_shape_cast_fp16)[name = string("cast_127")]; + uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_326_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")]; + string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor([0])]; + int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_126")]; + tensor expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([6, 0, 0, 0])]; + tensor concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor([0])]; + tensor concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor([0])]; + tensor concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor([0])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")]; + tensor k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_62)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = k_cache2)[name = string("coreml_update_state_64")]; + tensor var_331_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_331_shape_cast_fp16")]; + int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)]; + int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)]; + bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)]; + string var_331_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_331_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)]; + tensor var_331_shape_cast_fp16_to_uint16 = cast(dtype = var_331_shape_cast_fp16_to_uint16_dtype_0, x = var_331_shape_cast_fp16)[name = string("cast_125")]; + uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_331_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")]; + string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor([0])]; + int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_124")]; + tensor expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([6, 0, 0, 0])]; + tensor concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor([0])]; + tensor concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor([0])]; + tensor concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor([0])]; + int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)]; + bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)]; + tensor concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")]; + tensor v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_63)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = v_cache2)[name = string("coreml_update_state_65")]; + tensor var_353_to_fp16 = const()[name = string("op_353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51398144)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_353_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")]; + tensor var_357_to_fp16 = const()[name = string("op_357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53495360)))]; + tensor var_358_to_fp16 = const()[name = string("op_358_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55592576)))]; + tensor linear_15_cast_fp16 = linear(bias = var_358_to_fp16, weight = var_357_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")]; + tensor var_360_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_360_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_360_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_360_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_360_shape_cast_fp16_to_uint16 = cast(dtype = var_360_shape_cast_fp16_to_uint16_dtype_0, x = var_360_shape_cast_fp16)[name = string("cast_123")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_360_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_122")]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([7, 0, 0, 0])]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([0])]; + tensor concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor([0])]; + tensor concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor([0])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")]; + tensor k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_64)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = k_cache2)[name = string("coreml_update_state_66")]; + tensor var_365_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_365_shape_cast_fp16")]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_365_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_365_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)]; + tensor var_365_shape_cast_fp16_to_uint16 = cast(dtype = var_365_shape_cast_fp16_to_uint16_dtype_0, x = var_365_shape_cast_fp16)[name = string("cast_121")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_365_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor([0])]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_120")]; + tensor expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([7, 0, 0, 0])]; + tensor concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor([0])]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")]; + tensor v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_65)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = v_cache2)[name = string("coreml_update_state_67")]; + tensor var_387_to_fp16 = const()[name = string("op_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55594688)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_387_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")]; + tensor var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57691904)))]; + tensor var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59789120)))]; + tensor linear_17_cast_fp16 = linear(bias = var_392_to_fp16, weight = var_391_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")]; + tensor var_394_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_394_shape_cast_fp16")]; + int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)]; + int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)]; + bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)]; + string var_394_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_394_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)]; + tensor var_394_shape_cast_fp16_to_uint16 = cast(dtype = var_394_shape_cast_fp16_to_uint16_dtype_0, x = var_394_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_394_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")]; + string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor([0])]; + int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_118")]; + tensor expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([8, 0, 0, 0])]; + tensor concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor([0])]; + tensor concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor([0])]; + tensor concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor([0])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")]; + tensor k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_66)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache2)[name = string("coreml_update_state_68")]; + tensor var_399_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_399_shape_cast_fp16")]; + int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)]; + int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)]; + bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)]; + string var_399_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_399_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)]; + tensor var_399_shape_cast_fp16_to_uint16 = cast(dtype = var_399_shape_cast_fp16_to_uint16_dtype_0, x = var_399_shape_cast_fp16)[name = string("cast_117")]; + uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_399_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")]; + string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor([0])]; + int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_116")]; + tensor expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([8, 0, 0, 0])]; + tensor concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor([0])]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")]; + tensor v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_67)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache2)[name = string("coreml_update_state_69")]; + tensor var_421_to_fp16 = const()[name = string("op_421_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59791232)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_421_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")]; + tensor var_425_to_fp16 = const()[name = string("op_425_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61888448)))]; + tensor var_426_to_fp16 = const()[name = string("op_426_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63985664)))]; + tensor linear_19_cast_fp16 = linear(bias = var_426_to_fp16, weight = var_425_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")]; + tensor var_428_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_428_shape_cast_fp16")]; + int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)]; + int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)]; + bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)]; + string var_428_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_428_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)]; + tensor var_428_shape_cast_fp16_to_uint16 = cast(dtype = var_428_shape_cast_fp16_to_uint16_dtype_0, x = var_428_shape_cast_fp16)[name = string("cast_115")]; + uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_428_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")]; + string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_114")]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([9, 0, 0, 0])]; + tensor concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor([0])]; + tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; + tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")]; + tensor k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_68)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache2)[name = string("coreml_update_state_70")]; + tensor var_433_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_433_shape_cast_fp16")]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_433_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_433_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)]; + tensor var_433_shape_cast_fp16_to_uint16 = cast(dtype = var_433_shape_cast_fp16_to_uint16_dtype_0, x = var_433_shape_cast_fp16)[name = string("cast_113")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_433_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor([0])]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_112")]; + tensor expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([9, 0, 0, 0])]; + tensor concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor([0])]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")]; + tensor v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_69)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache2)[name = string("coreml_update_state_71")]; + tensor var_455_to_fp16 = const()[name = string("op_455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63987776)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_455_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")]; + tensor var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66084992)))]; + tensor var_460_to_fp16 = const()[name = string("op_460_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68182208)))]; + tensor linear_21_cast_fp16 = linear(bias = var_460_to_fp16, weight = var_459_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")]; + tensor var_462_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_462_shape_cast_fp16")]; + int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)]; + int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)]; + bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)]; + string var_462_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_462_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)]; + tensor var_462_shape_cast_fp16_to_uint16 = cast(dtype = var_462_shape_cast_fp16_to_uint16_dtype_0, x = var_462_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_462_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")]; + string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor([0])]; + int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_110")]; + tensor expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([10, 0, 0, 0])]; + tensor concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor([0])]; + tensor concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor([0])]; + tensor concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor([0])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")]; + tensor k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_70)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache2)[name = string("coreml_update_state_72")]; + tensor var_467_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_467_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_467_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_467_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)]; + tensor var_467_shape_cast_fp16_to_uint16 = cast(dtype = var_467_shape_cast_fp16_to_uint16_dtype_0, x = var_467_shape_cast_fp16)[name = string("cast_109")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_467_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor([0])]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_108")]; + tensor expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")]; + tensor concat_68 = const()[name = string("concat_68"), val = tensor([10, 0, 0, 0])]; + tensor concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor([0])]; + tensor concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor([0])]; + tensor concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor([0])]; + int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)]; + bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)]; + tensor concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")]; + tensor v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_71)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache2)[name = string("coreml_update_state_73")]; + tensor var_489_to_fp16 = const()[name = string("op_489_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68184320)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_489_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")]; + tensor var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70281536)))]; + tensor var_494_to_fp16 = const()[name = string("op_494_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72378752)))]; + tensor linear_23_cast_fp16 = linear(bias = var_494_to_fp16, weight = var_493_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")]; + tensor var_496_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_496_shape_cast_fp16")]; + int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)]; + int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)]; + bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)]; + string var_496_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_496_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)]; + tensor var_496_shape_cast_fp16_to_uint16 = cast(dtype = var_496_shape_cast_fp16_to_uint16_dtype_0, x = var_496_shape_cast_fp16)[name = string("cast_107")]; + uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_496_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")]; + string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_106")]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([11, 0, 0, 0])]; + tensor concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor([0])]; + tensor concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor([0])]; + tensor concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor([0])]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")]; + tensor k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_72)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache2)[name = string("coreml_update_state_74")]; + tensor var_501_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_501_shape_cast_fp16")]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_501_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_501_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_501_shape_cast_fp16_to_uint16 = cast(dtype = var_501_shape_cast_fp16_to_uint16_dtype_0, x = var_501_shape_cast_fp16)[name = string("cast_105")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_501_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor([0])]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_104")]; + tensor expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")]; + tensor concat_74 = const()[name = string("concat_74"), val = tensor([11, 0, 0, 0])]; + tensor concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor([0])]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")]; + tensor v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_73)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache2)[name = string("coreml_update_state_75")]; + tensor var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72380864)))]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_523_to_fp16, x = audio_data)[name = string("linear_24_cast_fp16")]; + tensor var_527_to_fp16 = const()[name = string("op_527_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74478080)))]; + tensor var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76575296)))]; + tensor linear_25_cast_fp16 = linear(bias = var_528_to_fp16, weight = var_527_to_fp16, x = audio_data)[name = string("linear_25_cast_fp16")]; + tensor var_530_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_530_shape_cast_fp16")]; + int32 gather_24_axis_0 = const()[name = string("gather_24_axis_0"), val = int32(0)]; + int32 gather_24_batch_dims_0 = const()[name = string("gather_24_batch_dims_0"), val = int32(0)]; + bool gather_24_validate_indices_0 = const()[name = string("gather_24_validate_indices_0"), val = bool(false)]; + string var_530_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_530_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_24_to_uint16 = const()[name = string("select_24_to_uint16"), val = uint16(1)]; + tensor var_530_shape_cast_fp16_to_uint16 = cast(dtype = var_530_shape_cast_fp16_to_uint16_dtype_0, x = var_530_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_24_cast_uint16 = gather(axis = gather_24_axis_0, batch_dims = gather_24_batch_dims_0, indices = select_24_to_uint16, validate_indices = gather_24_validate_indices_0, x = var_530_shape_cast_fp16_to_uint16)[name = string("gather_24_cast_uint16")]; + string gather_24_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_24_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_107_axes_0 = const()[name = string("expand_dims_107_axes_0"), val = tensor([0])]; + int32 gather_24_cast_uint16_to_int32 = cast(dtype = gather_24_cast_uint16_to_int32_dtype_0, x = gather_24_cast_uint16)[name = string("cast_102")]; + tensor expand_dims_107 = expand_dims(axes = expand_dims_107_axes_0, x = gather_24_cast_uint16_to_int32)[name = string("expand_dims_107")]; + tensor concat_77 = const()[name = string("concat_77"), val = tensor([12, 0, 0, 0])]; + tensor concat_78_values0_0 = const()[name = string("concat_78_values0_0"), val = tensor([0])]; + tensor concat_78_values1_0 = const()[name = string("concat_78_values1_0"), val = tensor([0])]; + tensor concat_78_values3_0 = const()[name = string("concat_78_values3_0"), val = tensor([0])]; + int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)]; + bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)]; + tensor concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (concat_78_values0_0, concat_78_values1_0, expand_dims_107, concat_78_values3_0))[name = string("concat_78")]; + tensor k_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_77, begin_mask = k_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_78, end_mask = k_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_13_stride_0, update = linear_24_cast_fp16, x = coreml_update_state_74)[name = string("k_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_13_cast_fp16, input = k_cache2)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache2)[name = string("coreml_update_state_76")]; + tensor var_535_shape_cast_fp16 = shape(x = linear_25_cast_fp16)[name = string("op_535_shape_cast_fp16")]; + int32 gather_25_axis_0 = const()[name = string("gather_25_axis_0"), val = int32(0)]; + int32 gather_25_batch_dims_0 = const()[name = string("gather_25_batch_dims_0"), val = int32(0)]; + bool gather_25_validate_indices_0 = const()[name = string("gather_25_validate_indices_0"), val = bool(false)]; + string var_535_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_535_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_25_to_uint16 = const()[name = string("select_25_to_uint16"), val = uint16(1)]; + tensor var_535_shape_cast_fp16_to_uint16 = cast(dtype = var_535_shape_cast_fp16_to_uint16_dtype_0, x = var_535_shape_cast_fp16)[name = string("cast_101")]; + uint16 gather_25_cast_uint16 = gather(axis = gather_25_axis_0, batch_dims = gather_25_batch_dims_0, indices = select_25_to_uint16, validate_indices = gather_25_validate_indices_0, x = var_535_shape_cast_fp16_to_uint16)[name = string("gather_25_cast_uint16")]; + string gather_25_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_25_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_111_axes_0 = const()[name = string("expand_dims_111_axes_0"), val = tensor([0])]; + int32 gather_25_cast_uint16_to_int32 = cast(dtype = gather_25_cast_uint16_to_int32_dtype_0, x = gather_25_cast_uint16)[name = string("cast_100")]; + tensor expand_dims_111 = expand_dims(axes = expand_dims_111_axes_0, x = gather_25_cast_uint16_to_int32)[name = string("expand_dims_111")]; + tensor concat_80 = const()[name = string("concat_80"), val = tensor([12, 0, 0, 0])]; + tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([0])]; + tensor concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = tensor([0])]; + tensor concat_81_values3_0 = const()[name = string("concat_81_values3_0"), val = tensor([0])]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, concat_81_values1_0, expand_dims_111, concat_81_values3_0))[name = string("concat_81")]; + tensor v_cache2_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_80, begin_mask = v_cache2_internal_tensor_assign_13_begin_mask_0, end = concat_81, end_mask = v_cache2_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_13_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_75)[name = string("v_cache2_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_13_cast_fp16, input = v_cache2)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache2)[name = string("coreml_update_state_77")]; + tensor var_557_to_fp16 = const()[name = string("op_557_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76577408)))]; + tensor linear_26_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_557_to_fp16, x = audio_data)[name = string("linear_26_cast_fp16")]; + tensor var_561_to_fp16 = const()[name = string("op_561_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78674624)))]; + tensor var_562_to_fp16 = const()[name = string("op_562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80771840)))]; + tensor linear_27_cast_fp16 = linear(bias = var_562_to_fp16, weight = var_561_to_fp16, x = audio_data)[name = string("linear_27_cast_fp16")]; + tensor var_564_shape_cast_fp16 = shape(x = linear_26_cast_fp16)[name = string("op_564_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_564_shape_cast_fp16_to_uint16 = cast(dtype = var_564_shape_cast_fp16_to_uint16_dtype_0, x = var_564_shape_cast_fp16)[name = string("cast_99")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_564_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_98")]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = gather_26_cast_uint16_to_int32)[name = string("expand_dims_115")]; + tensor concat_83 = const()[name = string("concat_83"), val = tensor([13, 0, 0, 0])]; + tensor concat_84_values0_0 = const()[name = string("concat_84_values0_0"), val = tensor([0])]; + tensor concat_84_values1_0 = const()[name = string("concat_84_values1_0"), val = tensor([0])]; + tensor concat_84_values3_0 = const()[name = string("concat_84_values3_0"), val = tensor([0])]; + int32 concat_84_axis_0 = const()[name = string("concat_84_axis_0"), val = int32(0)]; + bool concat_84_interleave_0 = const()[name = string("concat_84_interleave_0"), val = bool(false)]; + tensor concat_84 = concat(axis = concat_84_axis_0, interleave = concat_84_interleave_0, values = (concat_84_values0_0, concat_84_values1_0, expand_dims_115, concat_84_values3_0))[name = string("concat_84")]; + tensor k_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_83, begin_mask = k_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_84, end_mask = k_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_14_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_76)[name = string("k_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_14_cast_fp16, input = k_cache2)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache2)[name = string("coreml_update_state_78")]; + tensor var_569_shape_cast_fp16 = shape(x = linear_27_cast_fp16)[name = string("op_569_shape_cast_fp16")]; + int32 gather_27_axis_0 = const()[name = string("gather_27_axis_0"), val = int32(0)]; + int32 gather_27_batch_dims_0 = const()[name = string("gather_27_batch_dims_0"), val = int32(0)]; + bool gather_27_validate_indices_0 = const()[name = string("gather_27_validate_indices_0"), val = bool(false)]; + string var_569_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_569_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_27_to_uint16 = const()[name = string("select_27_to_uint16"), val = uint16(1)]; + tensor var_569_shape_cast_fp16_to_uint16 = cast(dtype = var_569_shape_cast_fp16_to_uint16_dtype_0, x = var_569_shape_cast_fp16)[name = string("cast_97")]; + uint16 gather_27_cast_uint16 = gather(axis = gather_27_axis_0, batch_dims = gather_27_batch_dims_0, indices = select_27_to_uint16, validate_indices = gather_27_validate_indices_0, x = var_569_shape_cast_fp16_to_uint16)[name = string("gather_27_cast_uint16")]; + string gather_27_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_27_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_119_axes_0 = const()[name = string("expand_dims_119_axes_0"), val = tensor([0])]; + int32 gather_27_cast_uint16_to_int32 = cast(dtype = gather_27_cast_uint16_to_int32_dtype_0, x = gather_27_cast_uint16)[name = string("cast_96")]; + tensor expand_dims_119 = expand_dims(axes = expand_dims_119_axes_0, x = gather_27_cast_uint16_to_int32)[name = string("expand_dims_119")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([13, 0, 0, 0])]; + tensor concat_87_values0_0 = const()[name = string("concat_87_values0_0"), val = tensor([0])]; + tensor concat_87_values1_0 = const()[name = string("concat_87_values1_0"), val = tensor([0])]; + tensor concat_87_values3_0 = const()[name = string("concat_87_values3_0"), val = tensor([0])]; + int32 concat_87_axis_0 = const()[name = string("concat_87_axis_0"), val = int32(0)]; + bool concat_87_interleave_0 = const()[name = string("concat_87_interleave_0"), val = bool(false)]; + tensor concat_87 = concat(axis = concat_87_axis_0, interleave = concat_87_interleave_0, values = (concat_87_values0_0, concat_87_values1_0, expand_dims_119, concat_87_values3_0))[name = string("concat_87")]; + tensor v_cache2_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_cache2_internal_tensor_assign_14_begin_mask_0, end = concat_87, end_mask = v_cache2_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_14_stride_0, update = linear_27_cast_fp16, x = coreml_update_state_77)[name = string("v_cache2_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_14_cast_fp16, input = v_cache2)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache2)[name = string("coreml_update_state_79")]; + tensor var_591_to_fp16 = const()[name = string("op_591_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80773952)))]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_591_to_fp16, x = audio_data)[name = string("linear_28_cast_fp16")]; + tensor var_595_to_fp16 = const()[name = string("op_595_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82871168)))]; + tensor var_596_to_fp16 = const()[name = string("op_596_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84968384)))]; + tensor linear_29_cast_fp16 = linear(bias = var_596_to_fp16, weight = var_595_to_fp16, x = audio_data)[name = string("linear_29_cast_fp16")]; + tensor var_598_shape_cast_fp16 = shape(x = linear_28_cast_fp16)[name = string("op_598_shape_cast_fp16")]; + int32 gather_28_axis_0 = const()[name = string("gather_28_axis_0"), val = int32(0)]; + int32 gather_28_batch_dims_0 = const()[name = string("gather_28_batch_dims_0"), val = int32(0)]; + bool gather_28_validate_indices_0 = const()[name = string("gather_28_validate_indices_0"), val = bool(false)]; + string var_598_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_598_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_28_to_uint16 = const()[name = string("select_28_to_uint16"), val = uint16(1)]; + tensor var_598_shape_cast_fp16_to_uint16 = cast(dtype = var_598_shape_cast_fp16_to_uint16_dtype_0, x = var_598_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_28_cast_uint16 = gather(axis = gather_28_axis_0, batch_dims = gather_28_batch_dims_0, indices = select_28_to_uint16, validate_indices = gather_28_validate_indices_0, x = var_598_shape_cast_fp16_to_uint16)[name = string("gather_28_cast_uint16")]; + string gather_28_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_28_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_123_axes_0 = const()[name = string("expand_dims_123_axes_0"), val = tensor([0])]; + int32 gather_28_cast_uint16_to_int32 = cast(dtype = gather_28_cast_uint16_to_int32_dtype_0, x = gather_28_cast_uint16)[name = string("cast_94")]; + tensor expand_dims_123 = expand_dims(axes = expand_dims_123_axes_0, x = gather_28_cast_uint16_to_int32)[name = string("expand_dims_123")]; + tensor concat_89 = const()[name = string("concat_89"), val = tensor([14, 0, 0, 0])]; + tensor concat_90_values0_0 = const()[name = string("concat_90_values0_0"), val = tensor([0])]; + tensor concat_90_values1_0 = const()[name = string("concat_90_values1_0"), val = tensor([0])]; + tensor concat_90_values3_0 = const()[name = string("concat_90_values3_0"), val = tensor([0])]; + int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; + bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; + tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (concat_90_values0_0, concat_90_values1_0, expand_dims_123, concat_90_values3_0))[name = string("concat_90")]; + tensor k_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_89, begin_mask = k_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_90, end_mask = k_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_15_stride_0, update = linear_28_cast_fp16, x = coreml_update_state_78)[name = string("k_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_15_cast_fp16, input = k_cache2)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache2)[name = string("coreml_update_state_80")]; + tensor var_603_shape_cast_fp16 = shape(x = linear_29_cast_fp16)[name = string("op_603_shape_cast_fp16")]; + int32 gather_29_axis_0 = const()[name = string("gather_29_axis_0"), val = int32(0)]; + int32 gather_29_batch_dims_0 = const()[name = string("gather_29_batch_dims_0"), val = int32(0)]; + bool gather_29_validate_indices_0 = const()[name = string("gather_29_validate_indices_0"), val = bool(false)]; + string var_603_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_603_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_29_to_uint16 = const()[name = string("select_29_to_uint16"), val = uint16(1)]; + tensor var_603_shape_cast_fp16_to_uint16 = cast(dtype = var_603_shape_cast_fp16_to_uint16_dtype_0, x = var_603_shape_cast_fp16)[name = string("cast_93")]; + uint16 gather_29_cast_uint16 = gather(axis = gather_29_axis_0, batch_dims = gather_29_batch_dims_0, indices = select_29_to_uint16, validate_indices = gather_29_validate_indices_0, x = var_603_shape_cast_fp16_to_uint16)[name = string("gather_29_cast_uint16")]; + string gather_29_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_29_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_127_axes_0 = const()[name = string("expand_dims_127_axes_0"), val = tensor([0])]; + int32 gather_29_cast_uint16_to_int32 = cast(dtype = gather_29_cast_uint16_to_int32_dtype_0, x = gather_29_cast_uint16)[name = string("cast_92")]; + tensor expand_dims_127 = expand_dims(axes = expand_dims_127_axes_0, x = gather_29_cast_uint16_to_int32)[name = string("expand_dims_127")]; + tensor concat_92 = const()[name = string("concat_92"), val = tensor([14, 0, 0, 0])]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_127, concat_93_values3_0))[name = string("concat_93")]; + tensor v_cache2_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache2_internal_tensor_assign_15_begin_mask_0, end = concat_93, end_mask = v_cache2_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_15_stride_0, update = linear_29_cast_fp16, x = coreml_update_state_79)[name = string("v_cache2_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_15_cast_fp16, input = v_cache2)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache2)[name = string("coreml_update_state_81")]; + tensor var_625_to_fp16 = const()[name = string("op_625_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84970496)))]; + tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_625_to_fp16, x = audio_data)[name = string("linear_30_cast_fp16")]; + tensor var_629_to_fp16 = const()[name = string("op_629_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87067712)))]; + tensor var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89164928)))]; + tensor linear_31_cast_fp16 = linear(bias = var_630_to_fp16, weight = var_629_to_fp16, x = audio_data)[name = string("linear_31_cast_fp16")]; + tensor var_632_shape_cast_fp16 = shape(x = linear_30_cast_fp16)[name = string("op_632_shape_cast_fp16")]; + int32 gather_30_axis_0 = const()[name = string("gather_30_axis_0"), val = int32(0)]; + int32 gather_30_batch_dims_0 = const()[name = string("gather_30_batch_dims_0"), val = int32(0)]; + bool gather_30_validate_indices_0 = const()[name = string("gather_30_validate_indices_0"), val = bool(false)]; + string var_632_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_632_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_30_to_uint16 = const()[name = string("select_30_to_uint16"), val = uint16(1)]; + tensor var_632_shape_cast_fp16_to_uint16 = cast(dtype = var_632_shape_cast_fp16_to_uint16_dtype_0, x = var_632_shape_cast_fp16)[name = string("cast_91")]; + uint16 gather_30_cast_uint16 = gather(axis = gather_30_axis_0, batch_dims = gather_30_batch_dims_0, indices = select_30_to_uint16, validate_indices = gather_30_validate_indices_0, x = var_632_shape_cast_fp16_to_uint16)[name = string("gather_30_cast_uint16")]; + string gather_30_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_30_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + int32 gather_30_cast_uint16_to_int32 = cast(dtype = gather_30_cast_uint16_to_int32_dtype_0, x = gather_30_cast_uint16)[name = string("cast_90")]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = gather_30_cast_uint16_to_int32)[name = string("expand_dims_131")]; + tensor concat_95 = const()[name = string("concat_95"), val = tensor([15, 0, 0, 0])]; + tensor concat_96_values0_0 = const()[name = string("concat_96_values0_0"), val = tensor([0])]; + tensor concat_96_values1_0 = const()[name = string("concat_96_values1_0"), val = tensor([0])]; + tensor concat_96_values3_0 = const()[name = string("concat_96_values3_0"), val = tensor([0])]; + int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)]; + bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)]; + tensor concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (concat_96_values0_0, concat_96_values1_0, expand_dims_131, concat_96_values3_0))[name = string("concat_96")]; + tensor k_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_95, begin_mask = k_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_96, end_mask = k_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_16_stride_0, update = linear_30_cast_fp16, x = coreml_update_state_80)[name = string("k_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_16_cast_fp16, input = k_cache2)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache2)[name = string("coreml_update_state_82")]; + tensor var_637_shape_cast_fp16 = shape(x = linear_31_cast_fp16)[name = string("op_637_shape_cast_fp16")]; + int32 gather_31_axis_0 = const()[name = string("gather_31_axis_0"), val = int32(0)]; + int32 gather_31_batch_dims_0 = const()[name = string("gather_31_batch_dims_0"), val = int32(0)]; + bool gather_31_validate_indices_0 = const()[name = string("gather_31_validate_indices_0"), val = bool(false)]; + string var_637_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_637_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_31_to_uint16 = const()[name = string("select_31_to_uint16"), val = uint16(1)]; + tensor var_637_shape_cast_fp16_to_uint16 = cast(dtype = var_637_shape_cast_fp16_to_uint16_dtype_0, x = var_637_shape_cast_fp16)[name = string("cast_89")]; + uint16 gather_31_cast_uint16 = gather(axis = gather_31_axis_0, batch_dims = gather_31_batch_dims_0, indices = select_31_to_uint16, validate_indices = gather_31_validate_indices_0, x = var_637_shape_cast_fp16_to_uint16)[name = string("gather_31_cast_uint16")]; + string gather_31_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_31_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_135_axes_0 = const()[name = string("expand_dims_135_axes_0"), val = tensor([0])]; + int32 gather_31_cast_uint16_to_int32 = cast(dtype = gather_31_cast_uint16_to_int32_dtype_0, x = gather_31_cast_uint16)[name = string("cast_88")]; + tensor expand_dims_135 = expand_dims(axes = expand_dims_135_axes_0, x = gather_31_cast_uint16_to_int32)[name = string("expand_dims_135")]; + tensor concat_98 = const()[name = string("concat_98"), val = tensor([15, 0, 0, 0])]; + tensor concat_99_values0_0 = const()[name = string("concat_99_values0_0"), val = tensor([0])]; + tensor concat_99_values1_0 = const()[name = string("concat_99_values1_0"), val = tensor([0])]; + tensor concat_99_values3_0 = const()[name = string("concat_99_values3_0"), val = tensor([0])]; + int32 concat_99_axis_0 = const()[name = string("concat_99_axis_0"), val = int32(0)]; + bool concat_99_interleave_0 = const()[name = string("concat_99_interleave_0"), val = bool(false)]; + tensor concat_99 = concat(axis = concat_99_axis_0, interleave = concat_99_interleave_0, values = (concat_99_values0_0, concat_99_values1_0, expand_dims_135, concat_99_values3_0))[name = string("concat_99")]; + tensor v_cache2_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_98, begin_mask = v_cache2_internal_tensor_assign_16_begin_mask_0, end = concat_99, end_mask = v_cache2_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_16_stride_0, update = linear_31_cast_fp16, x = coreml_update_state_81)[name = string("v_cache2_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_16_cast_fp16, input = v_cache2)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache2)[name = string("coreml_update_state_83")]; + tensor var_659_to_fp16 = const()[name = string("op_659_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89167040)))]; + tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_659_to_fp16, x = audio_data)[name = string("linear_32_cast_fp16")]; + tensor var_663_to_fp16 = const()[name = string("op_663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91264256)))]; + tensor var_664_to_fp16 = const()[name = string("op_664_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93361472)))]; + tensor linear_33_cast_fp16 = linear(bias = var_664_to_fp16, weight = var_663_to_fp16, x = audio_data)[name = string("linear_33_cast_fp16")]; + tensor var_666_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_666_shape_cast_fp16")]; + int32 gather_32_axis_0 = const()[name = string("gather_32_axis_0"), val = int32(0)]; + int32 gather_32_batch_dims_0 = const()[name = string("gather_32_batch_dims_0"), val = int32(0)]; + bool gather_32_validate_indices_0 = const()[name = string("gather_32_validate_indices_0"), val = bool(false)]; + string var_666_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_666_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_32_to_uint16 = const()[name = string("select_32_to_uint16"), val = uint16(1)]; + tensor var_666_shape_cast_fp16_to_uint16 = cast(dtype = var_666_shape_cast_fp16_to_uint16_dtype_0, x = var_666_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_32_cast_uint16 = gather(axis = gather_32_axis_0, batch_dims = gather_32_batch_dims_0, indices = select_32_to_uint16, validate_indices = gather_32_validate_indices_0, x = var_666_shape_cast_fp16_to_uint16)[name = string("gather_32_cast_uint16")]; + string gather_32_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_32_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_139_axes_0 = const()[name = string("expand_dims_139_axes_0"), val = tensor([0])]; + int32 gather_32_cast_uint16_to_int32 = cast(dtype = gather_32_cast_uint16_to_int32_dtype_0, x = gather_32_cast_uint16)[name = string("cast_86")]; + tensor expand_dims_139 = expand_dims(axes = expand_dims_139_axes_0, x = gather_32_cast_uint16_to_int32)[name = string("expand_dims_139")]; + tensor concat_101 = const()[name = string("concat_101"), val = tensor([16, 0, 0, 0])]; + tensor concat_102_values0_0 = const()[name = string("concat_102_values0_0"), val = tensor([0])]; + tensor concat_102_values1_0 = const()[name = string("concat_102_values1_0"), val = tensor([0])]; + tensor concat_102_values3_0 = const()[name = string("concat_102_values3_0"), val = tensor([0])]; + int32 concat_102_axis_0 = const()[name = string("concat_102_axis_0"), val = int32(0)]; + bool concat_102_interleave_0 = const()[name = string("concat_102_interleave_0"), val = bool(false)]; + tensor concat_102 = concat(axis = concat_102_axis_0, interleave = concat_102_interleave_0, values = (concat_102_values0_0, concat_102_values1_0, expand_dims_139, concat_102_values3_0))[name = string("concat_102")]; + tensor k_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_101, begin_mask = k_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_102, end_mask = k_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_17_stride_0, update = linear_32_cast_fp16, x = coreml_update_state_82)[name = string("k_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_17_cast_fp16, input = k_cache2)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache2)[name = string("coreml_update_state_84")]; + tensor var_671_shape_cast_fp16 = shape(x = linear_33_cast_fp16)[name = string("op_671_shape_cast_fp16")]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_671_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_671_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(1)]; + tensor var_671_shape_cast_fp16_to_uint16 = cast(dtype = var_671_shape_cast_fp16_to_uint16_dtype_0, x = var_671_shape_cast_fp16)[name = string("cast_85")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_671_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_143_axes_0 = const()[name = string("expand_dims_143_axes_0"), val = tensor([0])]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_84")]; + tensor expand_dims_143 = expand_dims(axes = expand_dims_143_axes_0, x = gather_33_cast_uint16_to_int32)[name = string("expand_dims_143")]; + tensor concat_104 = const()[name = string("concat_104"), val = tensor([16, 0, 0, 0])]; + tensor concat_105_values0_0 = const()[name = string("concat_105_values0_0"), val = tensor([0])]; + tensor concat_105_values1_0 = const()[name = string("concat_105_values1_0"), val = tensor([0])]; + tensor concat_105_values3_0 = const()[name = string("concat_105_values3_0"), val = tensor([0])]; + int32 concat_105_axis_0 = const()[name = string("concat_105_axis_0"), val = int32(0)]; + bool concat_105_interleave_0 = const()[name = string("concat_105_interleave_0"), val = bool(false)]; + tensor concat_105 = concat(axis = concat_105_axis_0, interleave = concat_105_interleave_0, values = (concat_105_values0_0, concat_105_values1_0, expand_dims_143, concat_105_values3_0))[name = string("concat_105")]; + tensor v_cache2_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_104, begin_mask = v_cache2_internal_tensor_assign_17_begin_mask_0, end = concat_105, end_mask = v_cache2_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_17_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_83)[name = string("v_cache2_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_17_cast_fp16, input = v_cache2)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache2)[name = string("coreml_update_state_85")]; + tensor var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93363584)))]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_693_to_fp16, x = audio_data)[name = string("linear_34_cast_fp16")]; + tensor var_697_to_fp16 = const()[name = string("op_697_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95460800)))]; + tensor var_698_to_fp16 = const()[name = string("op_698_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97558016)))]; + tensor linear_35_cast_fp16 = linear(bias = var_698_to_fp16, weight = var_697_to_fp16, x = audio_data)[name = string("linear_35_cast_fp16")]; + tensor var_700_shape_cast_fp16 = shape(x = linear_34_cast_fp16)[name = string("op_700_shape_cast_fp16")]; + int32 gather_34_axis_0 = const()[name = string("gather_34_axis_0"), val = int32(0)]; + int32 gather_34_batch_dims_0 = const()[name = string("gather_34_batch_dims_0"), val = int32(0)]; + bool gather_34_validate_indices_0 = const()[name = string("gather_34_validate_indices_0"), val = bool(false)]; + string var_700_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_700_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_34_to_uint16 = const()[name = string("select_34_to_uint16"), val = uint16(1)]; + tensor var_700_shape_cast_fp16_to_uint16 = cast(dtype = var_700_shape_cast_fp16_to_uint16_dtype_0, x = var_700_shape_cast_fp16)[name = string("cast_83")]; + uint16 gather_34_cast_uint16 = gather(axis = gather_34_axis_0, batch_dims = gather_34_batch_dims_0, indices = select_34_to_uint16, validate_indices = gather_34_validate_indices_0, x = var_700_shape_cast_fp16_to_uint16)[name = string("gather_34_cast_uint16")]; + string gather_34_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_34_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + int32 gather_34_cast_uint16_to_int32 = cast(dtype = gather_34_cast_uint16_to_int32_dtype_0, x = gather_34_cast_uint16)[name = string("cast_82")]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = gather_34_cast_uint16_to_int32)[name = string("expand_dims_147")]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([17, 0, 0, 0])]; + tensor concat_108_values0_0 = const()[name = string("concat_108_values0_0"), val = tensor([0])]; + tensor concat_108_values1_0 = const()[name = string("concat_108_values1_0"), val = tensor([0])]; + tensor concat_108_values3_0 = const()[name = string("concat_108_values3_0"), val = tensor([0])]; + int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; + bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (concat_108_values0_0, concat_108_values1_0, expand_dims_147, concat_108_values3_0))[name = string("concat_108")]; + tensor k_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_107, begin_mask = k_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_108, end_mask = k_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_18_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_84)[name = string("k_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_18_cast_fp16, input = k_cache2)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache2)[name = string("coreml_update_state_86")]; + tensor var_705_shape_cast_fp16 = shape(x = linear_35_cast_fp16)[name = string("op_705_shape_cast_fp16")]; + int32 gather_35_axis_0 = const()[name = string("gather_35_axis_0"), val = int32(0)]; + int32 gather_35_batch_dims_0 = const()[name = string("gather_35_batch_dims_0"), val = int32(0)]; + bool gather_35_validate_indices_0 = const()[name = string("gather_35_validate_indices_0"), val = bool(false)]; + string var_705_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_705_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_35_to_uint16 = const()[name = string("select_35_to_uint16"), val = uint16(1)]; + tensor var_705_shape_cast_fp16_to_uint16 = cast(dtype = var_705_shape_cast_fp16_to_uint16_dtype_0, x = var_705_shape_cast_fp16)[name = string("cast_81")]; + uint16 gather_35_cast_uint16 = gather(axis = gather_35_axis_0, batch_dims = gather_35_batch_dims_0, indices = select_35_to_uint16, validate_indices = gather_35_validate_indices_0, x = var_705_shape_cast_fp16_to_uint16)[name = string("gather_35_cast_uint16")]; + string gather_35_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_35_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_151_axes_0 = const()[name = string("expand_dims_151_axes_0"), val = tensor([0])]; + int32 gather_35_cast_uint16_to_int32 = cast(dtype = gather_35_cast_uint16_to_int32_dtype_0, x = gather_35_cast_uint16)[name = string("cast_80")]; + tensor expand_dims_151 = expand_dims(axes = expand_dims_151_axes_0, x = gather_35_cast_uint16_to_int32)[name = string("expand_dims_151")]; + tensor concat_110 = const()[name = string("concat_110"), val = tensor([17, 0, 0, 0])]; + tensor concat_111_values0_0 = const()[name = string("concat_111_values0_0"), val = tensor([0])]; + tensor concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor([0])]; + tensor concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor([0])]; + int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; + bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; + tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (concat_111_values0_0, concat_111_values1_0, expand_dims_151, concat_111_values3_0))[name = string("concat_111")]; + tensor v_cache2_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_110, begin_mask = v_cache2_internal_tensor_assign_18_begin_mask_0, end = concat_111, end_mask = v_cache2_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_18_stride_0, update = linear_35_cast_fp16, x = coreml_update_state_85)[name = string("v_cache2_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_18_cast_fp16, input = v_cache2)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache2)[name = string("coreml_update_state_87")]; + tensor var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97560128)))]; + tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_727_to_fp16, x = audio_data)[name = string("linear_36_cast_fp16")]; + tensor var_731_to_fp16 = const()[name = string("op_731_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99657344)))]; + tensor var_732_to_fp16 = const()[name = string("op_732_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101754560)))]; + tensor linear_37_cast_fp16 = linear(bias = var_732_to_fp16, weight = var_731_to_fp16, x = audio_data)[name = string("linear_37_cast_fp16")]; + tensor var_734_shape_cast_fp16 = shape(x = linear_36_cast_fp16)[name = string("op_734_shape_cast_fp16")]; + int32 gather_36_axis_0 = const()[name = string("gather_36_axis_0"), val = int32(0)]; + int32 gather_36_batch_dims_0 = const()[name = string("gather_36_batch_dims_0"), val = int32(0)]; + bool gather_36_validate_indices_0 = const()[name = string("gather_36_validate_indices_0"), val = bool(false)]; + string var_734_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_734_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_36_to_uint16 = const()[name = string("select_36_to_uint16"), val = uint16(1)]; + tensor var_734_shape_cast_fp16_to_uint16 = cast(dtype = var_734_shape_cast_fp16_to_uint16_dtype_0, x = var_734_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_36_cast_uint16 = gather(axis = gather_36_axis_0, batch_dims = gather_36_batch_dims_0, indices = select_36_to_uint16, validate_indices = gather_36_validate_indices_0, x = var_734_shape_cast_fp16_to_uint16)[name = string("gather_36_cast_uint16")]; + string gather_36_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_36_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_155_axes_0 = const()[name = string("expand_dims_155_axes_0"), val = tensor([0])]; + int32 gather_36_cast_uint16_to_int32 = cast(dtype = gather_36_cast_uint16_to_int32_dtype_0, x = gather_36_cast_uint16)[name = string("cast_78")]; + tensor expand_dims_155 = expand_dims(axes = expand_dims_155_axes_0, x = gather_36_cast_uint16_to_int32)[name = string("expand_dims_155")]; + tensor concat_113 = const()[name = string("concat_113"), val = tensor([18, 0, 0, 0])]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([0])]; + tensor concat_114_values1_0 = const()[name = string("concat_114_values1_0"), val = tensor([0])]; + tensor concat_114_values3_0 = const()[name = string("concat_114_values3_0"), val = tensor([0])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, concat_114_values1_0, expand_dims_155, concat_114_values3_0))[name = string("concat_114")]; + tensor k_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_113, begin_mask = k_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_114, end_mask = k_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_19_stride_0, update = linear_36_cast_fp16, x = coreml_update_state_86)[name = string("k_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_19_cast_fp16, input = k_cache2)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache2)[name = string("coreml_update_state_88")]; + tensor var_739_shape_cast_fp16 = shape(x = linear_37_cast_fp16)[name = string("op_739_shape_cast_fp16")]; + int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; + int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; + bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; + string var_739_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_739_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(1)]; + tensor var_739_shape_cast_fp16_to_uint16 = cast(dtype = var_739_shape_cast_fp16_to_uint16_dtype_0, x = var_739_shape_cast_fp16)[name = string("cast_77")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_739_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; + string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_159_axes_0 = const()[name = string("expand_dims_159_axes_0"), val = tensor([0])]; + int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_76")]; + tensor expand_dims_159 = expand_dims(axes = expand_dims_159_axes_0, x = gather_37_cast_uint16_to_int32)[name = string("expand_dims_159")]; + tensor concat_116 = const()[name = string("concat_116"), val = tensor([18, 0, 0, 0])]; + tensor concat_117_values0_0 = const()[name = string("concat_117_values0_0"), val = tensor([0])]; + tensor concat_117_values1_0 = const()[name = string("concat_117_values1_0"), val = tensor([0])]; + tensor concat_117_values3_0 = const()[name = string("concat_117_values3_0"), val = tensor([0])]; + int32 concat_117_axis_0 = const()[name = string("concat_117_axis_0"), val = int32(0)]; + bool concat_117_interleave_0 = const()[name = string("concat_117_interleave_0"), val = bool(false)]; + tensor concat_117 = concat(axis = concat_117_axis_0, interleave = concat_117_interleave_0, values = (concat_117_values0_0, concat_117_values1_0, expand_dims_159, concat_117_values3_0))[name = string("concat_117")]; + tensor v_cache2_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_116, begin_mask = v_cache2_internal_tensor_assign_19_begin_mask_0, end = concat_117, end_mask = v_cache2_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_19_stride_0, update = linear_37_cast_fp16, x = coreml_update_state_87)[name = string("v_cache2_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_19_cast_fp16, input = v_cache2)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache2)[name = string("coreml_update_state_89")]; + tensor var_761_to_fp16 = const()[name = string("op_761_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101756672)))]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_761_to_fp16, x = audio_data)[name = string("linear_38_cast_fp16")]; + tensor var_765_to_fp16 = const()[name = string("op_765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103853888)))]; + tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105951104)))]; + tensor linear_39_cast_fp16 = linear(bias = var_766_to_fp16, weight = var_765_to_fp16, x = audio_data)[name = string("linear_39_cast_fp16")]; + tensor var_768_shape_cast_fp16 = shape(x = linear_38_cast_fp16)[name = string("op_768_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_768_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_768_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_768_shape_cast_fp16_to_uint16 = cast(dtype = var_768_shape_cast_fp16_to_uint16_dtype_0, x = var_768_shape_cast_fp16)[name = string("cast_75")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_768_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_74")]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = gather_38_cast_uint16_to_int32)[name = string("expand_dims_163")]; + tensor concat_119 = const()[name = string("concat_119"), val = tensor([19, 0, 0, 0])]; + tensor concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor([0])]; + tensor concat_120_values1_0 = const()[name = string("concat_120_values1_0"), val = tensor([0])]; + tensor concat_120_values3_0 = const()[name = string("concat_120_values3_0"), val = tensor([0])]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, concat_120_values1_0, expand_dims_163, concat_120_values3_0))[name = string("concat_120")]; + tensor k_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_119, begin_mask = k_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_120, end_mask = k_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_20_stride_0, update = linear_38_cast_fp16, x = coreml_update_state_88)[name = string("k_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_20_cast_fp16, input = k_cache2)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache2)[name = string("coreml_update_state_90")]; + tensor var_773_shape_cast_fp16 = shape(x = linear_39_cast_fp16)[name = string("op_773_shape_cast_fp16")]; + int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; + int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; + bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; + string var_773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(1)]; + tensor var_773_shape_cast_fp16_to_uint16 = cast(dtype = var_773_shape_cast_fp16_to_uint16_dtype_0, x = var_773_shape_cast_fp16)[name = string("cast_73")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_773_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; + string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_167_axes_0 = const()[name = string("expand_dims_167_axes_0"), val = tensor([0])]; + int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_72")]; + tensor expand_dims_167 = expand_dims(axes = expand_dims_167_axes_0, x = gather_39_cast_uint16_to_int32)[name = string("expand_dims_167")]; + tensor concat_122 = const()[name = string("concat_122"), val = tensor([19, 0, 0, 0])]; + tensor concat_123_values0_0 = const()[name = string("concat_123_values0_0"), val = tensor([0])]; + tensor concat_123_values1_0 = const()[name = string("concat_123_values1_0"), val = tensor([0])]; + tensor concat_123_values3_0 = const()[name = string("concat_123_values3_0"), val = tensor([0])]; + int32 concat_123_axis_0 = const()[name = string("concat_123_axis_0"), val = int32(0)]; + bool concat_123_interleave_0 = const()[name = string("concat_123_interleave_0"), val = bool(false)]; + tensor concat_123 = concat(axis = concat_123_axis_0, interleave = concat_123_interleave_0, values = (concat_123_values0_0, concat_123_values1_0, expand_dims_167, concat_123_values3_0))[name = string("concat_123")]; + tensor v_cache2_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_122, begin_mask = v_cache2_internal_tensor_assign_20_begin_mask_0, end = concat_123, end_mask = v_cache2_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_20_stride_0, update = linear_39_cast_fp16, x = coreml_update_state_89)[name = string("v_cache2_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_20_cast_fp16, input = v_cache2)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache2)[name = string("coreml_update_state_91")]; + tensor var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105953216)))]; + tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_795_to_fp16, x = audio_data)[name = string("linear_40_cast_fp16")]; + tensor var_799_to_fp16 = const()[name = string("op_799_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108050432)))]; + tensor var_800_to_fp16 = const()[name = string("op_800_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110147648)))]; + tensor linear_41_cast_fp16 = linear(bias = var_800_to_fp16, weight = var_799_to_fp16, x = audio_data)[name = string("linear_41_cast_fp16")]; + tensor var_802_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_802_shape_cast_fp16")]; + int32 gather_40_axis_0 = const()[name = string("gather_40_axis_0"), val = int32(0)]; + int32 gather_40_batch_dims_0 = const()[name = string("gather_40_batch_dims_0"), val = int32(0)]; + bool gather_40_validate_indices_0 = const()[name = string("gather_40_validate_indices_0"), val = bool(false)]; + string var_802_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_802_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_40_to_uint16 = const()[name = string("select_40_to_uint16"), val = uint16(1)]; + tensor var_802_shape_cast_fp16_to_uint16 = cast(dtype = var_802_shape_cast_fp16_to_uint16_dtype_0, x = var_802_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_40_cast_uint16 = gather(axis = gather_40_axis_0, batch_dims = gather_40_batch_dims_0, indices = select_40_to_uint16, validate_indices = gather_40_validate_indices_0, x = var_802_shape_cast_fp16_to_uint16)[name = string("gather_40_cast_uint16")]; + string gather_40_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_40_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_171_axes_0 = const()[name = string("expand_dims_171_axes_0"), val = tensor([0])]; + int32 gather_40_cast_uint16_to_int32 = cast(dtype = gather_40_cast_uint16_to_int32_dtype_0, x = gather_40_cast_uint16)[name = string("cast_70")]; + tensor expand_dims_171 = expand_dims(axes = expand_dims_171_axes_0, x = gather_40_cast_uint16_to_int32)[name = string("expand_dims_171")]; + tensor concat_125 = const()[name = string("concat_125"), val = tensor([20, 0, 0, 0])]; + tensor concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = tensor([0])]; + tensor concat_126_values1_0 = const()[name = string("concat_126_values1_0"), val = tensor([0])]; + tensor concat_126_values3_0 = const()[name = string("concat_126_values3_0"), val = tensor([0])]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, concat_126_values1_0, expand_dims_171, concat_126_values3_0))[name = string("concat_126")]; + tensor k_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_125, begin_mask = k_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_126, end_mask = k_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_21_stride_0, update = linear_40_cast_fp16, x = coreml_update_state_90)[name = string("k_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_21_cast_fp16, input = k_cache2)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache2)[name = string("coreml_update_state_92")]; + tensor var_807_shape_cast_fp16 = shape(x = linear_41_cast_fp16)[name = string("op_807_shape_cast_fp16")]; + int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; + int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; + bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; + string var_807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; + tensor var_807_shape_cast_fp16_to_uint16 = cast(dtype = var_807_shape_cast_fp16_to_uint16_dtype_0, x = var_807_shape_cast_fp16)[name = string("cast_69")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_807_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; + string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_175_axes_0 = const()[name = string("expand_dims_175_axes_0"), val = tensor([0])]; + int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_68")]; + tensor expand_dims_175 = expand_dims(axes = expand_dims_175_axes_0, x = gather_41_cast_uint16_to_int32)[name = string("expand_dims_175")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([20, 0, 0, 0])]; + tensor concat_129_values0_0 = const()[name = string("concat_129_values0_0"), val = tensor([0])]; + tensor concat_129_values1_0 = const()[name = string("concat_129_values1_0"), val = tensor([0])]; + tensor concat_129_values3_0 = const()[name = string("concat_129_values3_0"), val = tensor([0])]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (concat_129_values0_0, concat_129_values1_0, expand_dims_175, concat_129_values3_0))[name = string("concat_129")]; + tensor v_cache2_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_128, begin_mask = v_cache2_internal_tensor_assign_21_begin_mask_0, end = concat_129, end_mask = v_cache2_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_21_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_91)[name = string("v_cache2_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_21_cast_fp16, input = v_cache2)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache2)[name = string("coreml_update_state_93")]; + tensor var_829_to_fp16 = const()[name = string("op_829_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110149760)))]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_829_to_fp16, x = audio_data)[name = string("linear_42_cast_fp16")]; + tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112246976)))]; + tensor var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114344192)))]; + tensor linear_43_cast_fp16 = linear(bias = var_834_to_fp16, weight = var_833_to_fp16, x = audio_data)[name = string("linear_43_cast_fp16")]; + tensor var_836_shape_cast_fp16 = shape(x = linear_42_cast_fp16)[name = string("op_836_shape_cast_fp16")]; + int32 gather_42_axis_0 = const()[name = string("gather_42_axis_0"), val = int32(0)]; + int32 gather_42_batch_dims_0 = const()[name = string("gather_42_batch_dims_0"), val = int32(0)]; + bool gather_42_validate_indices_0 = const()[name = string("gather_42_validate_indices_0"), val = bool(false)]; + string var_836_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_836_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_42_to_uint16 = const()[name = string("select_42_to_uint16"), val = uint16(1)]; + tensor var_836_shape_cast_fp16_to_uint16 = cast(dtype = var_836_shape_cast_fp16_to_uint16_dtype_0, x = var_836_shape_cast_fp16)[name = string("cast_67")]; + uint16 gather_42_cast_uint16 = gather(axis = gather_42_axis_0, batch_dims = gather_42_batch_dims_0, indices = select_42_to_uint16, validate_indices = gather_42_validate_indices_0, x = var_836_shape_cast_fp16_to_uint16)[name = string("gather_42_cast_uint16")]; + string gather_42_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_42_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + int32 gather_42_cast_uint16_to_int32 = cast(dtype = gather_42_cast_uint16_to_int32_dtype_0, x = gather_42_cast_uint16)[name = string("cast_66")]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = gather_42_cast_uint16_to_int32)[name = string("expand_dims_179")]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([21, 0, 0, 0])]; + tensor concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = tensor([0])]; + tensor concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = tensor([0])]; + tensor concat_132_values3_0 = const()[name = string("concat_132_values3_0"), val = tensor([0])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, expand_dims_179, concat_132_values3_0))[name = string("concat_132")]; + tensor k_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_131, begin_mask = k_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_132, end_mask = k_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_22_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_92)[name = string("k_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_22_cast_fp16, input = k_cache2)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = k_cache2)[name = string("coreml_update_state_94")]; + tensor var_841_shape_cast_fp16 = shape(x = linear_43_cast_fp16)[name = string("op_841_shape_cast_fp16")]; + int32 gather_43_axis_0 = const()[name = string("gather_43_axis_0"), val = int32(0)]; + int32 gather_43_batch_dims_0 = const()[name = string("gather_43_batch_dims_0"), val = int32(0)]; + bool gather_43_validate_indices_0 = const()[name = string("gather_43_validate_indices_0"), val = bool(false)]; + string var_841_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_841_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_43_to_uint16 = const()[name = string("select_43_to_uint16"), val = uint16(1)]; + tensor var_841_shape_cast_fp16_to_uint16 = cast(dtype = var_841_shape_cast_fp16_to_uint16_dtype_0, x = var_841_shape_cast_fp16)[name = string("cast_65")]; + uint16 gather_43_cast_uint16 = gather(axis = gather_43_axis_0, batch_dims = gather_43_batch_dims_0, indices = select_43_to_uint16, validate_indices = gather_43_validate_indices_0, x = var_841_shape_cast_fp16_to_uint16)[name = string("gather_43_cast_uint16")]; + string gather_43_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_43_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_183_axes_0 = const()[name = string("expand_dims_183_axes_0"), val = tensor([0])]; + int32 gather_43_cast_uint16_to_int32 = cast(dtype = gather_43_cast_uint16_to_int32_dtype_0, x = gather_43_cast_uint16)[name = string("cast_64")]; + tensor expand_dims_183 = expand_dims(axes = expand_dims_183_axes_0, x = gather_43_cast_uint16_to_int32)[name = string("expand_dims_183")]; + tensor concat_134 = const()[name = string("concat_134"), val = tensor([21, 0, 0, 0])]; + tensor concat_135_values0_0 = const()[name = string("concat_135_values0_0"), val = tensor([0])]; + tensor concat_135_values1_0 = const()[name = string("concat_135_values1_0"), val = tensor([0])]; + tensor concat_135_values3_0 = const()[name = string("concat_135_values3_0"), val = tensor([0])]; + int32 concat_135_axis_0 = const()[name = string("concat_135_axis_0"), val = int32(0)]; + bool concat_135_interleave_0 = const()[name = string("concat_135_interleave_0"), val = bool(false)]; + tensor concat_135 = concat(axis = concat_135_axis_0, interleave = concat_135_interleave_0, values = (concat_135_values0_0, concat_135_values1_0, expand_dims_183, concat_135_values3_0))[name = string("concat_135")]; + tensor v_cache2_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_134, begin_mask = v_cache2_internal_tensor_assign_22_begin_mask_0, end = concat_135, end_mask = v_cache2_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_22_stride_0, update = linear_43_cast_fp16, x = coreml_update_state_93)[name = string("v_cache2_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_22_cast_fp16, input = v_cache2)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = v_cache2)[name = string("coreml_update_state_95")]; + tensor var_863_to_fp16 = const()[name = string("op_863_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114346304)))]; + tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_863_to_fp16, x = audio_data)[name = string("linear_44_cast_fp16")]; + tensor var_867_to_fp16 = const()[name = string("op_867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116443520)))]; + tensor var_868_to_fp16 = const()[name = string("op_868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118540736)))]; + tensor linear_45_cast_fp16 = linear(bias = var_868_to_fp16, weight = var_867_to_fp16, x = audio_data)[name = string("linear_45_cast_fp16")]; + tensor var_870_shape_cast_fp16 = shape(x = linear_44_cast_fp16)[name = string("op_870_shape_cast_fp16")]; + int32 gather_44_axis_0 = const()[name = string("gather_44_axis_0"), val = int32(0)]; + int32 gather_44_batch_dims_0 = const()[name = string("gather_44_batch_dims_0"), val = int32(0)]; + bool gather_44_validate_indices_0 = const()[name = string("gather_44_validate_indices_0"), val = bool(false)]; + string var_870_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_870_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_44_to_uint16 = const()[name = string("select_44_to_uint16"), val = uint16(1)]; + tensor var_870_shape_cast_fp16_to_uint16 = cast(dtype = var_870_shape_cast_fp16_to_uint16_dtype_0, x = var_870_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_44_cast_uint16 = gather(axis = gather_44_axis_0, batch_dims = gather_44_batch_dims_0, indices = select_44_to_uint16, validate_indices = gather_44_validate_indices_0, x = var_870_shape_cast_fp16_to_uint16)[name = string("gather_44_cast_uint16")]; + string gather_44_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_44_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_187_axes_0 = const()[name = string("expand_dims_187_axes_0"), val = tensor([0])]; + int32 gather_44_cast_uint16_to_int32 = cast(dtype = gather_44_cast_uint16_to_int32_dtype_0, x = gather_44_cast_uint16)[name = string("cast_62")]; + tensor expand_dims_187 = expand_dims(axes = expand_dims_187_axes_0, x = gather_44_cast_uint16_to_int32)[name = string("expand_dims_187")]; + tensor concat_137 = const()[name = string("concat_137"), val = tensor([22, 0, 0, 0])]; + tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([0])]; + tensor concat_138_values1_0 = const()[name = string("concat_138_values1_0"), val = tensor([0])]; + tensor concat_138_values3_0 = const()[name = string("concat_138_values3_0"), val = tensor([0])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, concat_138_values1_0, expand_dims_187, concat_138_values3_0))[name = string("concat_138")]; + tensor k_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_137, begin_mask = k_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_138, end_mask = k_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_23_stride_0, update = linear_44_cast_fp16, x = coreml_update_state_94)[name = string("k_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_23_cast_fp16, input = k_cache2)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = k_cache2)[name = string("coreml_update_state_96")]; + tensor var_875_shape_cast_fp16 = shape(x = linear_45_cast_fp16)[name = string("op_875_shape_cast_fp16")]; + int32 gather_45_axis_0 = const()[name = string("gather_45_axis_0"), val = int32(0)]; + int32 gather_45_batch_dims_0 = const()[name = string("gather_45_batch_dims_0"), val = int32(0)]; + bool gather_45_validate_indices_0 = const()[name = string("gather_45_validate_indices_0"), val = bool(false)]; + string var_875_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_875_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_45_to_uint16 = const()[name = string("select_45_to_uint16"), val = uint16(1)]; + tensor var_875_shape_cast_fp16_to_uint16 = cast(dtype = var_875_shape_cast_fp16_to_uint16_dtype_0, x = var_875_shape_cast_fp16)[name = string("cast_61")]; + uint16 gather_45_cast_uint16 = gather(axis = gather_45_axis_0, batch_dims = gather_45_batch_dims_0, indices = select_45_to_uint16, validate_indices = gather_45_validate_indices_0, x = var_875_shape_cast_fp16_to_uint16)[name = string("gather_45_cast_uint16")]; + string gather_45_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_45_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_191_axes_0 = const()[name = string("expand_dims_191_axes_0"), val = tensor([0])]; + int32 gather_45_cast_uint16_to_int32 = cast(dtype = gather_45_cast_uint16_to_int32_dtype_0, x = gather_45_cast_uint16)[name = string("cast_60")]; + tensor expand_dims_191 = expand_dims(axes = expand_dims_191_axes_0, x = gather_45_cast_uint16_to_int32)[name = string("expand_dims_191")]; + tensor concat_140 = const()[name = string("concat_140"), val = tensor([22, 0, 0, 0])]; + tensor concat_141_values0_0 = const()[name = string("concat_141_values0_0"), val = tensor([0])]; + tensor concat_141_values1_0 = const()[name = string("concat_141_values1_0"), val = tensor([0])]; + tensor concat_141_values3_0 = const()[name = string("concat_141_values3_0"), val = tensor([0])]; + int32 concat_141_axis_0 = const()[name = string("concat_141_axis_0"), val = int32(0)]; + bool concat_141_interleave_0 = const()[name = string("concat_141_interleave_0"), val = bool(false)]; + tensor concat_141 = concat(axis = concat_141_axis_0, interleave = concat_141_interleave_0, values = (concat_141_values0_0, concat_141_values1_0, expand_dims_191, concat_141_values3_0))[name = string("concat_141")]; + tensor v_cache2_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_140, begin_mask = v_cache2_internal_tensor_assign_23_begin_mask_0, end = concat_141, end_mask = v_cache2_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_23_stride_0, update = linear_45_cast_fp16, x = coreml_update_state_95)[name = string("v_cache2_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_23_cast_fp16, input = v_cache2)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = v_cache2)[name = string("coreml_update_state_97")]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118542848)))]; + tensor linear_46_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_897_to_fp16, x = audio_data)[name = string("linear_46_cast_fp16")]; + tensor var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120640064)))]; + tensor var_902_to_fp16 = const()[name = string("op_902_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122737280)))]; + tensor linear_47_cast_fp16 = linear(bias = var_902_to_fp16, weight = var_901_to_fp16, x = audio_data)[name = string("linear_47_cast_fp16")]; + tensor var_904_shape_cast_fp16 = shape(x = linear_46_cast_fp16)[name = string("op_904_shape_cast_fp16")]; + int32 gather_46_axis_0 = const()[name = string("gather_46_axis_0"), val = int32(0)]; + int32 gather_46_batch_dims_0 = const()[name = string("gather_46_batch_dims_0"), val = int32(0)]; + bool gather_46_validate_indices_0 = const()[name = string("gather_46_validate_indices_0"), val = bool(false)]; + string var_904_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_904_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_46_to_uint16 = const()[name = string("select_46_to_uint16"), val = uint16(1)]; + tensor var_904_shape_cast_fp16_to_uint16 = cast(dtype = var_904_shape_cast_fp16_to_uint16_dtype_0, x = var_904_shape_cast_fp16)[name = string("cast_59")]; + uint16 gather_46_cast_uint16 = gather(axis = gather_46_axis_0, batch_dims = gather_46_batch_dims_0, indices = select_46_to_uint16, validate_indices = gather_46_validate_indices_0, x = var_904_shape_cast_fp16_to_uint16)[name = string("gather_46_cast_uint16")]; + string gather_46_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_46_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + int32 gather_46_cast_uint16_to_int32 = cast(dtype = gather_46_cast_uint16_to_int32_dtype_0, x = gather_46_cast_uint16)[name = string("cast_58")]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = gather_46_cast_uint16_to_int32)[name = string("expand_dims_195")]; + tensor concat_143 = const()[name = string("concat_143"), val = tensor([23, 0, 0, 0])]; + tensor concat_144_values0_0 = const()[name = string("concat_144_values0_0"), val = tensor([0])]; + tensor concat_144_values1_0 = const()[name = string("concat_144_values1_0"), val = tensor([0])]; + tensor concat_144_values3_0 = const()[name = string("concat_144_values3_0"), val = tensor([0])]; + int32 concat_144_axis_0 = const()[name = string("concat_144_axis_0"), val = int32(0)]; + bool concat_144_interleave_0 = const()[name = string("concat_144_interleave_0"), val = bool(false)]; + tensor concat_144 = concat(axis = concat_144_axis_0, interleave = concat_144_interleave_0, values = (concat_144_values0_0, concat_144_values1_0, expand_dims_195, concat_144_values3_0))[name = string("concat_144")]; + tensor k_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_143, begin_mask = k_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_144, end_mask = k_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_24_stride_0, update = linear_46_cast_fp16, x = coreml_update_state_96)[name = string("k_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_24_cast_fp16, input = k_cache2)[name = string("coreml_update_state_98_write_state")]; + tensor var_909_shape_cast_fp16 = shape(x = linear_47_cast_fp16)[name = string("op_909_shape_cast_fp16")]; + int32 gather_47_axis_0 = const()[name = string("gather_47_axis_0"), val = int32(0)]; + int32 gather_47_batch_dims_0 = const()[name = string("gather_47_batch_dims_0"), val = int32(0)]; + bool gather_47_validate_indices_0 = const()[name = string("gather_47_validate_indices_0"), val = bool(false)]; + string var_909_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_909_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_47_to_uint16 = const()[name = string("select_47_to_uint16"), val = uint16(1)]; + tensor var_909_shape_cast_fp16_to_uint16 = cast(dtype = var_909_shape_cast_fp16_to_uint16_dtype_0, x = var_909_shape_cast_fp16)[name = string("cast_57")]; + uint16 gather_47_cast_uint16 = gather(axis = gather_47_axis_0, batch_dims = gather_47_batch_dims_0, indices = select_47_to_uint16, validate_indices = gather_47_validate_indices_0, x = var_909_shape_cast_fp16_to_uint16)[name = string("gather_47_cast_uint16")]; + string gather_47_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_47_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_199_axes_0 = const()[name = string("expand_dims_199_axes_0"), val = tensor([0])]; + int32 gather_47_cast_uint16_to_int32 = cast(dtype = gather_47_cast_uint16_to_int32_dtype_0, x = gather_47_cast_uint16)[name = string("cast_56")]; + tensor expand_dims_199 = expand_dims(axes = expand_dims_199_axes_0, x = gather_47_cast_uint16_to_int32)[name = string("expand_dims_199")]; + tensor concat_146 = const()[name = string("concat_146"), val = tensor([23, 0, 0, 0])]; + tensor concat_147_values0_0 = const()[name = string("concat_147_values0_0"), val = tensor([0])]; + tensor concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = tensor([0])]; + tensor concat_147_values3_0 = const()[name = string("concat_147_values3_0"), val = tensor([0])]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (concat_147_values0_0, concat_147_values1_0, expand_dims_199, concat_147_values3_0))[name = string("concat_147")]; + tensor v_cache2_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_146, begin_mask = v_cache2_internal_tensor_assign_24_begin_mask_0, end = concat_147, end_mask = v_cache2_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_24_stride_0, update = linear_47_cast_fp16, x = coreml_update_state_97)[name = string("v_cache2_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_24_cast_fp16, input = v_cache2)[name = string("coreml_update_state_99_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/medium/decoder_first.mlmodelc/weights/weight.bin b/medium/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3560eb93dcc3d2a99a37ff03a423674a7a91bb3 --- /dev/null +++ b/medium/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770db5fbf7afe2a5ce6088c1627c9603a75521b8a4837407cabb6376e82f72e8 +size 122739392 diff --git a/medium/decoder_second.mlmodelc/analytics/coremldata.bin b/medium/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..d524f2e627d050bc8b5c74fe58764bfc5d7924f6 --- /dev/null +++ b/medium/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c52f37c878809dca96af63fc4747def6ddfc186e51d71ca0f84e8dd484c3db4 +size 243 diff --git a/medium/decoder_second.mlmodelc/coremldata.bin b/medium/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..0af718ba0162adfaaeb89faa081b5174a0cdeb8f --- /dev/null +++ b/medium/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250736439ebc0224e5d43bc2ea92855a070857d494c8bd3c4cfd92a2f4dc6985 +size 487 diff --git a/medium/decoder_second.mlmodelc/metadata.json b/medium/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d216bac534168e7860b10485c727ee9714477199 --- /dev/null +++ b/medium/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 193, + "Ios18.readState" : 50, + "Ios18.expandDims" : 25, + "Ios18.sub" : 1, + "Ios18.matmul" : 96, + "Ios18.gelu" : 24, + "Ios18.gather" : 27, + "Ios18.concat" : 122, + "Shape" : 26, + "Ios18.add" : 121, + "Ios18.sliceUpdate" : 96, + "Ios18.sliceByIndex" : 193, + "Ios18.layerNorm" : 73, + "Ios18.cast" : 52, + "Ios18.transpose" : 192, + "Ios18.writeState" : 48, + "Ios18.reshape" : 192, + "Ios18.softmax" : 48, + "Ios18.mul" : 96 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 448 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 448, 1024]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 448 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 448, 1024]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 1500, 1024]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 24 × 1 × 1500 × 1024)", + "shortDescription" : "", + "shape" : "[24, 1, 1500, 1024]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/medium/decoder_second.mlmodelc/model.mil b/medium/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..cd3d6fcbe379a4bc54b96124dd582600a3439cf0 --- /dev/null +++ b/medium/decoder_second.mlmodelc/model.mil @@ -0,0 +1,4738 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_62_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_62_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_62_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_62_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_62_shape_cast_fp16_to_int16 = cast(dtype = var_62_shape_cast_fp16_to_int16_dtype_0, x = var_62_shape_cast_fp16)[name = string("cast_298")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_62_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_66_shape = shape(x = token_data)[name = string("op_66_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_66_shape_to_uint16_dtype_0 = const()[name = string("op_66_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_66_shape_to_uint16 = cast(dtype = var_66_shape_to_uint16_dtype_0, x = var_66_shape)[name = string("cast_296")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_66_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_295")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_297")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_122_axis_0 = const()[name = string("op_122_axis_0"), val = int32(0)]; + int32 var_122_batch_dims_0 = const()[name = string("op_122_batch_dims_0"), val = int32(0)]; + bool var_122_validate_indices_0 = const()[name = string("op_122_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_122_cast_fp16 = gather(axis = var_122_axis_0, batch_dims = var_122_batch_dims_0, indices = token_data, validate_indices = var_122_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_122_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(1024)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_125_end_mask_0 = const()[name = string("op_125_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106219648)))]; + tensor var_125_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_125_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_125_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_122_cast_fp16, y = var_125_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 1024])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 1024])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 1024])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 1024])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_148 = const()[name = string("op_148"), val = int32(-1)]; + tensor var_166_axes_0 = const()[name = string("op_166_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107137216)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107139328)))]; + fp16 var_154_to_fp16 = const()[name = string("op_154_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_166_cast_fp16 = layer_norm(axes = var_166_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_166_cast_fp16")]; + tensor var_177_to_fp16 = const()[name = string("op_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107141440)))]; + tensor var_178_to_fp16 = const()[name = string("op_178_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109238656)))]; + tensor linear_0_cast_fp16 = linear(bias = var_178_to_fp16, weight = var_177_to_fp16, x = var_166_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109240768)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111337984)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_181_to_fp16, x = var_166_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_185_to_fp16 = const()[name = string("op_185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111340096)))]; + tensor var_186_to_fp16 = const()[name = string("op_186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113437312)))]; + tensor linear_2_cast_fp16 = linear(bias = var_186_to_fp16, weight = var_185_to_fp16, x = var_166_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_188_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_188_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_188_shape_cast_fp16_to_uint16 = cast(dtype = var_188_shape_cast_fp16_to_uint16_dtype_0, x = var_188_shape_cast_fp16)[name = string("cast_294")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_188_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_293")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_48 = read_state(input = k_cache1)[name = string("coreml_update_state_48")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_49 = read_state(input = v_cache1)[name = string("coreml_update_state_49")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(1024)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_204_begin_0 = const()[name = string("op_204_begin_0"), val = tensor([0, 0, 0])]; + tensor var_204_end_mask_0 = const()[name = string("op_204_end_mask_0"), val = tensor([true, false, true])]; + tensor var_204_cast_fp16 = slice_by_index(begin = var_204_begin_0, end = concat_10, end_mask = var_204_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor var_207_begin_0 = const()[name = string("op_207_begin_0"), val = tensor([0, 0, 0])]; + tensor var_207_end_mask_0 = const()[name = string("op_207_end_mask_0"), val = tensor([true, false, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = concat_10, end_mask = var_207_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_207_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 16, 64])]; + tensor var_217_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_217_cast_fp16")]; + tensor const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_217_cast_fp16, y = const_120_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 16, 64])]; + tensor var_224_cast_fp16 = reshape(shape = concat_13x, x = var_204_cast_fp16)[name = string("op_224_cast_fp16")]; + tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_224_cast_fp16, y = const_121_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 16, 64])]; + tensor var_231_cast_fp16 = reshape(shape = concat_14x, x = var_207_cast_fp16)[name = string("op_231_cast_fp16")]; + tensor var_232 = const()[name = string("op_232"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_193_perm_0 = const()[name = string("transpose_193_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_194_perm_0 = const()[name = string("transpose_194_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_194 = transpose(perm = transpose_194_perm_0, x = k_5_cast_fp16)[name = string("transpose_478")]; + tensor transpose_193 = transpose(perm = transpose_193_perm_0, x = q_3_cast_fp16)[name = string("transpose_479")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_193, y = transpose_194)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_235_begin_0 = const()[name = string("op_235_begin_0"), val = tensor([0, 0])]; + tensor var_235_end_mask_0 = const()[name = string("op_235_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113439424)))]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = concat_15, end_mask = var_235_end_mask_0, x = mask_to_fp16)[name = string("op_235_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_236_begin_0 = const()[name = string("op_236_begin_0"), val = tensor([0, 0])]; + tensor var_236_end_mask_0 = const()[name = string("op_236_end_mask_0"), val = tensor([true, false])]; + tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = concat_16, end_mask = var_236_end_mask_0, x = var_235_cast_fp16)[name = string("op_236_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_236_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_239_cast_fp16 = softmax(axis = var_148, x = qk_3_cast_fp16)[name = string("op_239_cast_fp16")]; + bool var_241_transpose_x_0 = const()[name = string("op_241_transpose_x_0"), val = bool(false)]; + bool var_241_transpose_y_0 = const()[name = string("op_241_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_232, x = var_231_cast_fp16)[name = string("transpose_480")]; + tensor var_241_cast_fp16 = matmul(transpose_x = var_241_transpose_x_0, transpose_y = var_241_transpose_y_0, x = var_239_cast_fp16, y = v_5_cast_fp16)[name = string("op_241_cast_fp16")]; + tensor var_242 = const()[name = string("op_242"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 1024])]; + tensor var_243_cast_fp16 = transpose(perm = var_242, x = var_241_cast_fp16)[name = string("transpose_477")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_243_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_247_to_fp16 = const()[name = string("op_247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113840896)))]; + tensor var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115938112)))]; + tensor linear_3_cast_fp16 = linear(bias = var_248_to_fp16, weight = var_247_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_255_axes_0 = const()[name = string("op_255_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115940224)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115942336)))]; + tensor var_255_cast_fp16 = layer_norm(axes = var_255_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_255_cast_fp16")]; + tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115944448)))]; + tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118041664)))]; + tensor linear_4_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = var_255_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118043776)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 16, 64])]; + tensor var_285_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_285_cast_fp16")]; + tensor const_122_to_fp16 = const()[name = string("const_122_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_285_cast_fp16, y = const_122_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_291 = const()[name = string("op_291"), val = tensor([1, 1500, 16, -1])]; + tensor var_292_cast_fp16 = reshape(shape = var_291, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_292_cast_fp16")]; + tensor const_123_to_fp16 = const()[name = string("const_123_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_292_cast_fp16, y = const_123_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_298 = const()[name = string("op_298"), val = tensor([1, 1500, 16, -1])]; + tensor var_299_cast_fp16 = reshape(shape = var_298, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_299_cast_fp16")]; + tensor var_300 = const()[name = string("op_300"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_195_perm_0 = const()[name = string("transpose_195_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_196_perm_0 = const()[name = string("transpose_196_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_196 = transpose(perm = transpose_196_perm_0, x = k_9_cast_fp16)[name = string("transpose_474")]; + tensor transpose_195 = transpose(perm = transpose_195_perm_0, x = q_7_cast_fp16)[name = string("transpose_475")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_195, y = transpose_196)[name = string("qk_5_cast_fp16")]; + tensor var_304_cast_fp16 = softmax(axis = var_148, x = qk_5_cast_fp16)[name = string("op_304_cast_fp16")]; + bool var_306_transpose_x_0 = const()[name = string("op_306_transpose_x_0"), val = bool(false)]; + bool var_306_transpose_y_0 = const()[name = string("op_306_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_300, x = var_299_cast_fp16)[name = string("transpose_476")]; + tensor var_306_cast_fp16 = matmul(transpose_x = var_306_transpose_x_0, transpose_y = var_306_transpose_y_0, x = var_304_cast_fp16, y = v_9_cast_fp16)[name = string("op_306_cast_fp16")]; + tensor var_307 = const()[name = string("op_307"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 1024])]; + tensor var_308_cast_fp16 = transpose(perm = var_307, x = var_306_cast_fp16)[name = string("transpose_473")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_308_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121115840)))]; + tensor var_313_to_fp16 = const()[name = string("op_313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123213056)))]; + tensor linear_5_cast_fp16 = linear(bias = var_313_to_fp16, weight = var_312_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123215168)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123217280)))]; + tensor var_320_cast_fp16 = layer_norm(axes = var_320_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_154_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_320_cast_fp16")]; + tensor var_329_to_fp16 = const()[name = string("op_329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123219392)))]; + tensor var_330_to_fp16 = const()[name = string("op_330_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131608064)))]; + tensor linear_6_cast_fp16 = linear(bias = var_330_to_fp16, weight = var_329_to_fp16, x = var_320_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_335_to_fp16 = const()[name = string("op_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131616320)))]; + tensor var_336_to_fp16 = const()[name = string("op_336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140004992)))]; + tensor linear_7_cast_fp16 = linear(bias = var_336_to_fp16, weight = var_335_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 1024])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_48)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 1024])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_49)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 1024])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 1024])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_359 = const()[name = string("op_359"), val = int32(-1)]; + tensor var_377_axes_0 = const()[name = string("op_377_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140007104)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140009216)))]; + fp16 var_365_to_fp16 = const()[name = string("op_365_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_377_cast_fp16 = layer_norm(axes = var_377_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_377_cast_fp16")]; + tensor var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140011328)))]; + tensor var_389_to_fp16 = const()[name = string("op_389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142108544)))]; + tensor linear_8_cast_fp16 = linear(bias = var_389_to_fp16, weight = var_388_to_fp16, x = var_377_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142110656)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_392_to_fp16, x = var_377_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_396_to_fp16 = const()[name = string("op_396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144207872)))]; + tensor var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146305088)))]; + tensor linear_10_cast_fp16 = linear(bias = var_397_to_fp16, weight = var_396_to_fp16, x = var_377_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_399_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_399_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_399_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_399_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_399_shape_cast_fp16_to_uint16 = cast(dtype = var_399_shape_cast_fp16_to_uint16_dtype_0, x = var_399_shape_cast_fp16)[name = string("cast_292")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_399_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_291")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_48)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_50 = read_state(input = k_cache1)[name = string("coreml_update_state_50")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_49)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_51 = read_state(input = v_cache1)[name = string("coreml_update_state_51")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(1024)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_415_begin_0 = const()[name = string("op_415_begin_0"), val = tensor([0, 0, 0])]; + tensor var_415_end_mask_0 = const()[name = string("op_415_end_mask_0"), val = tensor([true, false, true])]; + tensor var_415_cast_fp16 = slice_by_index(begin = var_415_begin_0, end = concat_32, end_mask = var_415_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_415_cast_fp16")]; + tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([0, 0, 0])]; + tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([true, false, true])]; + tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = concat_32, end_mask = var_418_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 16, 64])]; + tensor var_428_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_428_cast_fp16")]; + tensor const_124_to_fp16 = const()[name = string("const_124_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_428_cast_fp16, y = const_124_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 16, 64])]; + tensor var_435_cast_fp16 = reshape(shape = concat_35x, x = var_415_cast_fp16)[name = string("op_435_cast_fp16")]; + tensor const_125_to_fp16 = const()[name = string("const_125_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_435_cast_fp16, y = const_125_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 16, 64])]; + tensor var_442_cast_fp16 = reshape(shape = concat_36x, x = var_418_cast_fp16)[name = string("op_442_cast_fp16")]; + tensor var_443 = const()[name = string("op_443"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_197_perm_0 = const()[name = string("transpose_197_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_198_perm_0 = const()[name = string("transpose_198_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_198 = transpose(perm = transpose_198_perm_0, x = k_15_cast_fp16)[name = string("transpose_470")]; + tensor transpose_197 = transpose(perm = transpose_197_perm_0, x = q_11_cast_fp16)[name = string("transpose_471")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_197, y = transpose_198)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_446_begin_0 = const()[name = string("op_446_begin_0"), val = tensor([0, 0])]; + tensor var_446_end_mask_0 = const()[name = string("op_446_end_mask_0"), val = tensor([false, true])]; + tensor var_446_cast_fp16 = slice_by_index(begin = var_446_begin_0, end = concat_37, end_mask = var_446_end_mask_0, x = mask_to_fp16)[name = string("op_446_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_447_begin_0 = const()[name = string("op_447_begin_0"), val = tensor([0, 0])]; + tensor var_447_end_mask_0 = const()[name = string("op_447_end_mask_0"), val = tensor([true, false])]; + tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = concat_38, end_mask = var_447_end_mask_0, x = var_446_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_447_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_450_cast_fp16 = softmax(axis = var_359, x = qk_9_cast_fp16)[name = string("op_450_cast_fp16")]; + bool var_452_transpose_x_0 = const()[name = string("op_452_transpose_x_0"), val = bool(false)]; + bool var_452_transpose_y_0 = const()[name = string("op_452_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_443, x = var_442_cast_fp16)[name = string("transpose_472")]; + tensor var_452_cast_fp16 = matmul(transpose_x = var_452_transpose_x_0, transpose_y = var_452_transpose_y_0, x = var_450_cast_fp16, y = v_15_cast_fp16)[name = string("op_452_cast_fp16")]; + tensor var_453 = const()[name = string("op_453"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 1024])]; + tensor var_454_cast_fp16 = transpose(perm = var_453, x = var_452_cast_fp16)[name = string("transpose_469")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_454_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146307200)))]; + tensor var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148404416)))]; + tensor linear_11_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_466_axes_0 = const()[name = string("op_466_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148406528)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148408640)))]; + tensor var_466_cast_fp16 = layer_norm(axes = var_466_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_466_cast_fp16")]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148410752)))]; + tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150507968)))]; + tensor linear_12_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = var_466_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 16, 64])]; + tensor var_496_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_496_cast_fp16")]; + tensor const_126_to_fp16 = const()[name = string("const_126_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_496_cast_fp16, y = const_126_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_502 = const()[name = string("op_502"), val = tensor([1, 1500, 16, -1])]; + tensor var_503_cast_fp16 = reshape(shape = var_502, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_503_cast_fp16")]; + tensor const_127_to_fp16 = const()[name = string("const_127_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_503_cast_fp16, y = const_127_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_509 = const()[name = string("op_509"), val = tensor([1, 1500, 16, -1])]; + tensor var_510_cast_fp16 = reshape(shape = var_509, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_510_cast_fp16")]; + tensor var_511 = const()[name = string("op_511"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_199_perm_0 = const()[name = string("transpose_199_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_200_perm_0 = const()[name = string("transpose_200_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_200 = transpose(perm = transpose_200_perm_0, x = k_19_cast_fp16)[name = string("transpose_466")]; + tensor transpose_199 = transpose(perm = transpose_199_perm_0, x = q_15_cast_fp16)[name = string("transpose_467")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_199, y = transpose_200)[name = string("qk_11_cast_fp16")]; + tensor var_515_cast_fp16 = softmax(axis = var_359, x = qk_11_cast_fp16)[name = string("op_515_cast_fp16")]; + bool var_517_transpose_x_0 = const()[name = string("op_517_transpose_x_0"), val = bool(false)]; + bool var_517_transpose_y_0 = const()[name = string("op_517_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_511, x = var_510_cast_fp16)[name = string("transpose_468")]; + tensor var_517_cast_fp16 = matmul(transpose_x = var_517_transpose_x_0, transpose_y = var_517_transpose_y_0, x = var_515_cast_fp16, y = v_19_cast_fp16)[name = string("op_517_cast_fp16")]; + tensor var_518 = const()[name = string("op_518"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 1024])]; + tensor var_519_cast_fp16 = transpose(perm = var_518, x = var_517_cast_fp16)[name = string("transpose_465")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_519_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_523_to_fp16 = const()[name = string("op_523_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150510080)))]; + tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152607296)))]; + tensor linear_13_cast_fp16 = linear(bias = var_524_to_fp16, weight = var_523_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_531_axes_0 = const()[name = string("op_531_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152609408)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152611520)))]; + tensor var_531_cast_fp16 = layer_norm(axes = var_531_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_365_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_531_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = string("op_540_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152613632)))]; + tensor var_541_to_fp16 = const()[name = string("op_541_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161002304)))]; + tensor linear_14_cast_fp16 = linear(bias = var_541_to_fp16, weight = var_540_to_fp16, x = var_531_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_546_to_fp16 = const()[name = string("op_546_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161010560)))]; + tensor var_547_to_fp16 = const()[name = string("op_547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169399232)))]; + tensor linear_15_cast_fp16 = linear(bias = var_547_to_fp16, weight = var_546_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 1024])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_50)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 1024])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_51)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 1024])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 1024])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_570 = const()[name = string("op_570"), val = int32(-1)]; + tensor var_588_axes_0 = const()[name = string("op_588_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169401344)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169403456)))]; + fp16 var_576_to_fp16 = const()[name = string("op_576_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_588_cast_fp16")]; + tensor var_599_to_fp16 = const()[name = string("op_599_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169405568)))]; + tensor var_600_to_fp16 = const()[name = string("op_600_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171502784)))]; + tensor linear_16_cast_fp16 = linear(bias = var_600_to_fp16, weight = var_599_to_fp16, x = var_588_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171504896)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_603_to_fp16, x = var_588_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_607_to_fp16 = const()[name = string("op_607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173602112)))]; + tensor var_608_to_fp16 = const()[name = string("op_608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175699328)))]; + tensor linear_18_cast_fp16 = linear(bias = var_608_to_fp16, weight = var_607_to_fp16, x = var_588_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_610_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_610_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_610_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_610_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_610_shape_cast_fp16_to_uint16 = cast(dtype = var_610_shape_cast_fp16_to_uint16_dtype_0, x = var_610_shape_cast_fp16)[name = string("cast_290")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_610_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_289")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_50)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_52 = read_state(input = k_cache1)[name = string("coreml_update_state_52")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_51)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_53 = read_state(input = v_cache1)[name = string("coreml_update_state_53")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(1024)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor([0, 0, 0])]; + tensor var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor([true, false, true])]; + tensor var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = concat_54, end_mask = var_626_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_626_cast_fp16")]; + tensor var_629_begin_0 = const()[name = string("op_629_begin_0"), val = tensor([0, 0, 0])]; + tensor var_629_end_mask_0 = const()[name = string("op_629_end_mask_0"), val = tensor([true, false, true])]; + tensor var_629_cast_fp16 = slice_by_index(begin = var_629_begin_0, end = concat_54, end_mask = var_629_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_629_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 16, 64])]; + tensor var_639_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_639_cast_fp16")]; + tensor const_128_to_fp16 = const()[name = string("const_128_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_639_cast_fp16, y = const_128_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 16, 64])]; + tensor var_646_cast_fp16 = reshape(shape = concat_57x, x = var_626_cast_fp16)[name = string("op_646_cast_fp16")]; + tensor const_129_to_fp16 = const()[name = string("const_129_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_646_cast_fp16, y = const_129_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 16, 64])]; + tensor var_653_cast_fp16 = reshape(shape = concat_58x, x = var_629_cast_fp16)[name = string("op_653_cast_fp16")]; + tensor var_654 = const()[name = string("op_654"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_201_perm_0 = const()[name = string("transpose_201_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_202_perm_0 = const()[name = string("transpose_202_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_202 = transpose(perm = transpose_202_perm_0, x = k_25_cast_fp16)[name = string("transpose_462")]; + tensor transpose_201 = transpose(perm = transpose_201_perm_0, x = q_19_cast_fp16)[name = string("transpose_463")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_201, y = transpose_202)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_657_begin_0 = const()[name = string("op_657_begin_0"), val = tensor([0, 0])]; + tensor var_657_end_mask_0 = const()[name = string("op_657_end_mask_0"), val = tensor([false, true])]; + tensor var_657_cast_fp16 = slice_by_index(begin = var_657_begin_0, end = concat_59, end_mask = var_657_end_mask_0, x = mask_to_fp16)[name = string("op_657_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_658_begin_0 = const()[name = string("op_658_begin_0"), val = tensor([0, 0])]; + tensor var_658_end_mask_0 = const()[name = string("op_658_end_mask_0"), val = tensor([true, false])]; + tensor var_658_cast_fp16 = slice_by_index(begin = var_658_begin_0, end = concat_60, end_mask = var_658_end_mask_0, x = var_657_cast_fp16)[name = string("op_658_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_658_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_661_cast_fp16 = softmax(axis = var_570, x = qk_15_cast_fp16)[name = string("op_661_cast_fp16")]; + bool var_663_transpose_x_0 = const()[name = string("op_663_transpose_x_0"), val = bool(false)]; + bool var_663_transpose_y_0 = const()[name = string("op_663_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_654, x = var_653_cast_fp16)[name = string("transpose_464")]; + tensor var_663_cast_fp16 = matmul(transpose_x = var_663_transpose_x_0, transpose_y = var_663_transpose_y_0, x = var_661_cast_fp16, y = v_25_cast_fp16)[name = string("op_663_cast_fp16")]; + tensor var_664 = const()[name = string("op_664"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 1024])]; + tensor var_665_cast_fp16 = transpose(perm = var_664, x = var_663_cast_fp16)[name = string("transpose_461")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_665_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175701440)))]; + tensor var_670_to_fp16 = const()[name = string("op_670_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177798656)))]; + tensor linear_19_cast_fp16 = linear(bias = var_670_to_fp16, weight = var_669_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_677_axes_0 = const()[name = string("op_677_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177800768)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177802880)))]; + tensor var_677_cast_fp16 = layer_norm(axes = var_677_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_677_cast_fp16")]; + tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177804992)))]; + tensor var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179902208)))]; + tensor linear_20_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = var_677_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 16, 64])]; + tensor var_707_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_707_cast_fp16")]; + tensor const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_707_cast_fp16, y = const_130_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_713 = const()[name = string("op_713"), val = tensor([1, 1500, 16, -1])]; + tensor var_714_cast_fp16 = reshape(shape = var_713, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_714_cast_fp16")]; + tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_714_cast_fp16, y = const_131_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_720 = const()[name = string("op_720"), val = tensor([1, 1500, 16, -1])]; + tensor var_721_cast_fp16 = reshape(shape = var_720, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_721_cast_fp16")]; + tensor var_722 = const()[name = string("op_722"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_203_perm_0 = const()[name = string("transpose_203_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_204_perm_0 = const()[name = string("transpose_204_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_204 = transpose(perm = transpose_204_perm_0, x = k_29_cast_fp16)[name = string("transpose_458")]; + tensor transpose_203 = transpose(perm = transpose_203_perm_0, x = q_23_cast_fp16)[name = string("transpose_459")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_203, y = transpose_204)[name = string("qk_17_cast_fp16")]; + tensor var_726_cast_fp16 = softmax(axis = var_570, x = qk_17_cast_fp16)[name = string("op_726_cast_fp16")]; + bool var_728_transpose_x_0 = const()[name = string("op_728_transpose_x_0"), val = bool(false)]; + bool var_728_transpose_y_0 = const()[name = string("op_728_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_722, x = var_721_cast_fp16)[name = string("transpose_460")]; + tensor var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = var_726_cast_fp16, y = v_29_cast_fp16)[name = string("op_728_cast_fp16")]; + tensor var_729 = const()[name = string("op_729"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 1024])]; + tensor var_730_cast_fp16 = transpose(perm = var_729, x = var_728_cast_fp16)[name = string("transpose_457")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_730_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179904320)))]; + tensor var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182001536)))]; + tensor linear_21_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_742_axes_0 = const()[name = string("op_742_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182003648)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182005760)))]; + tensor var_742_cast_fp16 = layer_norm(axes = var_742_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_576_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_742_cast_fp16")]; + tensor var_751_to_fp16 = const()[name = string("op_751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182007872)))]; + tensor var_752_to_fp16 = const()[name = string("op_752_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190396544)))]; + tensor linear_22_cast_fp16 = linear(bias = var_752_to_fp16, weight = var_751_to_fp16, x = var_742_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_757_to_fp16 = const()[name = string("op_757_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190404800)))]; + tensor var_758_to_fp16 = const()[name = string("op_758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198793472)))]; + tensor linear_23_cast_fp16 = linear(bias = var_758_to_fp16, weight = var_757_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 1024])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_52)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 1024])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_53)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 1024])]; + tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; + tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 1024])]; + tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; + int32 var_781 = const()[name = string("op_781"), val = int32(-1)]; + tensor var_799_axes_0 = const()[name = string("op_799_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198795584)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198797696)))]; + fp16 var_787_to_fp16 = const()[name = string("op_787_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_799_cast_fp16 = layer_norm(axes = var_799_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_799_cast_fp16")]; + tensor var_810_to_fp16 = const()[name = string("op_810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198799808)))]; + tensor var_811_to_fp16 = const()[name = string("op_811_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200897024)))]; + tensor linear_24_cast_fp16 = linear(bias = var_811_to_fp16, weight = var_810_to_fp16, x = var_799_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200899136)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_814_to_fp16, x = var_799_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_818_to_fp16 = const()[name = string("op_818_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202996352)))]; + tensor var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205093568)))]; + tensor linear_26_cast_fp16 = linear(bias = var_819_to_fp16, weight = var_818_to_fp16, x = var_799_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_821_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_821_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_821_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_821_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_821_shape_cast_fp16_to_uint16 = cast(dtype = var_821_shape_cast_fp16_to_uint16_dtype_0, x = var_821_shape_cast_fp16)[name = string("cast_288")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_821_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_287")]; + int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_52)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_54 = read_state(input = k_cache1)[name = string("coreml_update_state_54")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_53)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_55 = read_state(input = v_cache1)[name = string("coreml_update_state_55")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(1024)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; + tensor var_837_begin_0 = const()[name = string("op_837_begin_0"), val = tensor([0, 0, 0])]; + tensor var_837_end_mask_0 = const()[name = string("op_837_end_mask_0"), val = tensor([true, false, true])]; + tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = concat_76, end_mask = var_837_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_837_cast_fp16")]; + tensor var_840_begin_0 = const()[name = string("op_840_begin_0"), val = tensor([0, 0, 0])]; + tensor var_840_end_mask_0 = const()[name = string("op_840_end_mask_0"), val = tensor([true, false, true])]; + tensor var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = concat_76, end_mask = var_840_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 16, 64])]; + tensor var_850_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_850_cast_fp16")]; + tensor const_132_to_fp16 = const()[name = string("const_132_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_850_cast_fp16, y = const_132_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 16, 64])]; + tensor var_857_cast_fp16 = reshape(shape = concat_79x, x = var_837_cast_fp16)[name = string("op_857_cast_fp16")]; + tensor const_133_to_fp16 = const()[name = string("const_133_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_857_cast_fp16, y = const_133_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 16, 64])]; + tensor var_864_cast_fp16 = reshape(shape = concat_80x, x = var_840_cast_fp16)[name = string("op_864_cast_fp16")]; + tensor var_865 = const()[name = string("op_865"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_205_perm_0 = const()[name = string("transpose_205_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_206_perm_0 = const()[name = string("transpose_206_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_206 = transpose(perm = transpose_206_perm_0, x = k_35_cast_fp16)[name = string("transpose_454")]; + tensor transpose_205 = transpose(perm = transpose_205_perm_0, x = q_27_cast_fp16)[name = string("transpose_455")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_205, y = transpose_206)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_868_begin_0 = const()[name = string("op_868_begin_0"), val = tensor([0, 0])]; + tensor var_868_end_mask_0 = const()[name = string("op_868_end_mask_0"), val = tensor([false, true])]; + tensor var_868_cast_fp16 = slice_by_index(begin = var_868_begin_0, end = concat_81, end_mask = var_868_end_mask_0, x = mask_to_fp16)[name = string("op_868_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_869_begin_0 = const()[name = string("op_869_begin_0"), val = tensor([0, 0])]; + tensor var_869_end_mask_0 = const()[name = string("op_869_end_mask_0"), val = tensor([true, false])]; + tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = concat_82, end_mask = var_869_end_mask_0, x = var_868_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_869_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_872_cast_fp16 = softmax(axis = var_781, x = qk_21_cast_fp16)[name = string("op_872_cast_fp16")]; + bool var_874_transpose_x_0 = const()[name = string("op_874_transpose_x_0"), val = bool(false)]; + bool var_874_transpose_y_0 = const()[name = string("op_874_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_865, x = var_864_cast_fp16)[name = string("transpose_456")]; + tensor var_874_cast_fp16 = matmul(transpose_x = var_874_transpose_x_0, transpose_y = var_874_transpose_y_0, x = var_872_cast_fp16, y = v_35_cast_fp16)[name = string("op_874_cast_fp16")]; + tensor var_875 = const()[name = string("op_875"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 1024])]; + tensor var_876_cast_fp16 = transpose(perm = var_875, x = var_874_cast_fp16)[name = string("transpose_453")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_876_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_880_to_fp16 = const()[name = string("op_880_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205095680)))]; + tensor var_881_to_fp16 = const()[name = string("op_881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207192896)))]; + tensor linear_27_cast_fp16 = linear(bias = var_881_to_fp16, weight = var_880_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_888_axes_0 = const()[name = string("op_888_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207195008)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207197120)))]; + tensor var_888_cast_fp16 = layer_norm(axes = var_888_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_888_cast_fp16")]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207199232)))]; + tensor var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209296448)))]; + tensor linear_28_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = var_888_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 16, 64])]; + tensor var_918_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_918_cast_fp16")]; + tensor const_134_to_fp16 = const()[name = string("const_134_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_918_cast_fp16, y = const_134_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_924 = const()[name = string("op_924"), val = tensor([1, 1500, 16, -1])]; + tensor var_925_cast_fp16 = reshape(shape = var_924, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_925_cast_fp16")]; + tensor const_135_to_fp16 = const()[name = string("const_135_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_925_cast_fp16, y = const_135_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_931 = const()[name = string("op_931"), val = tensor([1, 1500, 16, -1])]; + tensor var_932_cast_fp16 = reshape(shape = var_931, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_932_cast_fp16")]; + tensor var_933 = const()[name = string("op_933"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_207_perm_0 = const()[name = string("transpose_207_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_208_perm_0 = const()[name = string("transpose_208_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_208 = transpose(perm = transpose_208_perm_0, x = k_39_cast_fp16)[name = string("transpose_450")]; + tensor transpose_207 = transpose(perm = transpose_207_perm_0, x = q_31_cast_fp16)[name = string("transpose_451")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_207, y = transpose_208)[name = string("qk_23_cast_fp16")]; + tensor var_937_cast_fp16 = softmax(axis = var_781, x = qk_23_cast_fp16)[name = string("op_937_cast_fp16")]; + bool var_939_transpose_x_0 = const()[name = string("op_939_transpose_x_0"), val = bool(false)]; + bool var_939_transpose_y_0 = const()[name = string("op_939_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_933, x = var_932_cast_fp16)[name = string("transpose_452")]; + tensor var_939_cast_fp16 = matmul(transpose_x = var_939_transpose_x_0, transpose_y = var_939_transpose_y_0, x = var_937_cast_fp16, y = v_39_cast_fp16)[name = string("op_939_cast_fp16")]; + tensor var_940 = const()[name = string("op_940"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 1024])]; + tensor var_941_cast_fp16 = transpose(perm = var_940, x = var_939_cast_fp16)[name = string("transpose_449")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_941_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209298560)))]; + tensor var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211395776)))]; + tensor linear_29_cast_fp16 = linear(bias = var_946_to_fp16, weight = var_945_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_953_axes_0 = const()[name = string("op_953_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211397888)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211400000)))]; + tensor var_953_cast_fp16 = layer_norm(axes = var_953_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_787_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_953_cast_fp16")]; + tensor var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211402112)))]; + tensor var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219790784)))]; + tensor linear_30_cast_fp16 = linear(bias = var_963_to_fp16, weight = var_962_to_fp16, x = var_953_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219799040)))]; + tensor var_969_to_fp16 = const()[name = string("op_969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228187712)))]; + tensor linear_31_cast_fp16 = linear(bias = var_969_to_fp16, weight = var_968_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 1024])]; + tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_54)[name = string("k_cache_17_cast_fp16")]; + tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 1024])]; + tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_55)[name = string("v_cache_17_cast_fp16")]; + tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 1024])]; + tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; + tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 1024])]; + tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; + int32 var_992 = const()[name = string("op_992"), val = int32(-1)]; + tensor var_1010_axes_0 = const()[name = string("op_1010_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228189824)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228191936)))]; + fp16 var_998_to_fp16 = const()[name = string("op_998_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1010_cast_fp16 = layer_norm(axes = var_1010_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_1010_cast_fp16")]; + tensor var_1021_to_fp16 = const()[name = string("op_1021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228194048)))]; + tensor var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230291264)))]; + tensor linear_32_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1010_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1025_to_fp16 = const()[name = string("op_1025_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230293376)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1025_to_fp16, x = var_1010_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor var_1029_to_fp16 = const()[name = string("op_1029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232390592)))]; + tensor var_1030_to_fp16 = const()[name = string("op_1030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234487808)))]; + tensor linear_34_cast_fp16 = linear(bias = var_1030_to_fp16, weight = var_1029_to_fp16, x = var_1010_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor var_1032_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_1032_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_1032_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1032_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_1032_shape_cast_fp16_to_uint16 = cast(dtype = var_1032_shape_cast_fp16_to_uint16_dtype_0, x = var_1032_shape_cast_fp16)[name = string("cast_286")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_1032_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_285")]; + int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; + tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; + tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_54)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_56 = read_state(input = k_cache1)[name = string("coreml_update_state_56")]; + tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_55)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_57 = read_state(input = v_cache1)[name = string("coreml_update_state_57")]; + int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; + int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(1024)]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; + tensor var_1048_begin_0 = const()[name = string("op_1048_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1048_end_mask_0 = const()[name = string("op_1048_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1048_cast_fp16 = slice_by_index(begin = var_1048_begin_0, end = concat_98, end_mask = var_1048_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1048_cast_fp16")]; + tensor var_1051_begin_0 = const()[name = string("op_1051_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1051_end_mask_0 = const()[name = string("op_1051_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1051_cast_fp16 = slice_by_index(begin = var_1051_begin_0, end = concat_98, end_mask = var_1051_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1051_cast_fp16")]; + tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 16, 64])]; + tensor var_1061_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1061_cast_fp16")]; + tensor const_136_to_fp16 = const()[name = string("const_136_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1061_cast_fp16, y = const_136_to_fp16)[name = string("q_35_cast_fp16")]; + tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 16, 64])]; + tensor var_1068_cast_fp16 = reshape(shape = concat_101x, x = var_1048_cast_fp16)[name = string("op_1068_cast_fp16")]; + tensor const_137_to_fp16 = const()[name = string("const_137_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_45_cast_fp16 = mul(x = var_1068_cast_fp16, y = const_137_to_fp16)[name = string("k_45_cast_fp16")]; + tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 16, 64])]; + tensor var_1075_cast_fp16 = reshape(shape = concat_102x, x = var_1051_cast_fp16)[name = string("op_1075_cast_fp16")]; + tensor var_1076 = const()[name = string("op_1076"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_209_perm_0 = const()[name = string("transpose_209_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_210_perm_0 = const()[name = string("transpose_210_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_210 = transpose(perm = transpose_210_perm_0, x = k_45_cast_fp16)[name = string("transpose_446")]; + tensor transpose_209 = transpose(perm = transpose_209_perm_0, x = q_35_cast_fp16)[name = string("transpose_447")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_209, y = transpose_210)[name = string("qk_25_cast_fp16")]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; + tensor var_1079_begin_0 = const()[name = string("op_1079_begin_0"), val = tensor([0, 0])]; + tensor var_1079_end_mask_0 = const()[name = string("op_1079_end_mask_0"), val = tensor([false, true])]; + tensor var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = concat_103, end_mask = var_1079_end_mask_0, x = mask_to_fp16)[name = string("op_1079_cast_fp16")]; + int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; + tensor var_1080_begin_0 = const()[name = string("op_1080_begin_0"), val = tensor([0, 0])]; + tensor var_1080_end_mask_0 = const()[name = string("op_1080_end_mask_0"), val = tensor([true, false])]; + tensor var_1080_cast_fp16 = slice_by_index(begin = var_1080_begin_0, end = concat_104, end_mask = var_1080_end_mask_0, x = var_1079_cast_fp16)[name = string("op_1080_cast_fp16")]; + tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1080_cast_fp16)[name = string("qk_27_cast_fp16")]; + tensor var_1083_cast_fp16 = softmax(axis = var_992, x = qk_27_cast_fp16)[name = string("op_1083_cast_fp16")]; + bool var_1085_transpose_x_0 = const()[name = string("op_1085_transpose_x_0"), val = bool(false)]; + bool var_1085_transpose_y_0 = const()[name = string("op_1085_transpose_y_0"), val = bool(false)]; + tensor v_45_cast_fp16 = transpose(perm = var_1076, x = var_1075_cast_fp16)[name = string("transpose_448")]; + tensor var_1085_cast_fp16 = matmul(transpose_x = var_1085_transpose_x_0, transpose_y = var_1085_transpose_y_0, x = var_1083_cast_fp16, y = v_45_cast_fp16)[name = string("op_1085_cast_fp16")]; + tensor var_1086 = const()[name = string("op_1086"), val = tensor([0, 2, 1, 3])]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 1024])]; + tensor var_1087_cast_fp16 = transpose(perm = var_1086, x = var_1085_cast_fp16)[name = string("transpose_445")]; + tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1087_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor var_1091_to_fp16 = const()[name = string("op_1091_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234489920)))]; + tensor var_1092_to_fp16 = const()[name = string("op_1092_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236587136)))]; + tensor linear_35_cast_fp16 = linear(bias = var_1092_to_fp16, weight = var_1091_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; + tensor var_1099_axes_0 = const()[name = string("op_1099_axes_0"), val = tensor([-1])]; + tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236589248)))]; + tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236591360)))]; + tensor var_1099_cast_fp16 = layer_norm(axes = var_1099_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1099_cast_fp16")]; + tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236593472)))]; + tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238690688)))]; + tensor linear_36_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = var_1099_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; + tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; + tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; + tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 16, 64])]; + tensor var_1129_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1129_cast_fp16")]; + tensor const_138_to_fp16 = const()[name = string("const_138_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1129_cast_fp16, y = const_138_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1135 = const()[name = string("op_1135"), val = tensor([1, 1500, 16, -1])]; + tensor var_1136_cast_fp16 = reshape(shape = var_1135, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1136_cast_fp16")]; + tensor const_139_to_fp16 = const()[name = string("const_139_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_49_cast_fp16 = mul(x = var_1136_cast_fp16, y = const_139_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_1142 = const()[name = string("op_1142"), val = tensor([1, 1500, 16, -1])]; + tensor var_1143_cast_fp16 = reshape(shape = var_1142, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1143_cast_fp16")]; + tensor var_1144 = const()[name = string("op_1144"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_211_perm_0 = const()[name = string("transpose_211_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_212_perm_0 = const()[name = string("transpose_212_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_212 = transpose(perm = transpose_212_perm_0, x = k_49_cast_fp16)[name = string("transpose_442")]; + tensor transpose_211 = transpose(perm = transpose_211_perm_0, x = q_39_cast_fp16)[name = string("transpose_443")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_211, y = transpose_212)[name = string("qk_29_cast_fp16")]; + tensor var_1148_cast_fp16 = softmax(axis = var_992, x = qk_29_cast_fp16)[name = string("op_1148_cast_fp16")]; + bool var_1150_transpose_x_0 = const()[name = string("op_1150_transpose_x_0"), val = bool(false)]; + bool var_1150_transpose_y_0 = const()[name = string("op_1150_transpose_y_0"), val = bool(false)]; + tensor v_49_cast_fp16 = transpose(perm = var_1144, x = var_1143_cast_fp16)[name = string("transpose_444")]; + tensor var_1150_cast_fp16 = matmul(transpose_x = var_1150_transpose_x_0, transpose_y = var_1150_transpose_y_0, x = var_1148_cast_fp16, y = v_49_cast_fp16)[name = string("op_1150_cast_fp16")]; + tensor var_1151 = const()[name = string("op_1151"), val = tensor([0, 2, 1, 3])]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 1024])]; + tensor var_1152_cast_fp16 = transpose(perm = var_1151, x = var_1150_cast_fp16)[name = string("transpose_441")]; + tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1152_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238692800)))]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240790016)))]; + tensor linear_37_cast_fp16 = linear(bias = var_1157_to_fp16, weight = var_1156_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; + tensor var_1164_axes_0 = const()[name = string("op_1164_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240792128)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240794240)))]; + tensor var_1164_cast_fp16 = layer_norm(axes = var_1164_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_998_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1164_cast_fp16")]; + tensor var_1173_to_fp16 = const()[name = string("op_1173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240796352)))]; + tensor var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249185024)))]; + tensor linear_38_cast_fp16 = linear(bias = var_1174_to_fp16, weight = var_1173_to_fp16, x = var_1164_cast_fp16)[name = string("linear_38_cast_fp16")]; + string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; + tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_1179_to_fp16 = const()[name = string("op_1179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249193280)))]; + tensor var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257581952)))]; + tensor linear_39_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; + tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 1024])]; + tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_56)[name = string("k_cache_21_cast_fp16")]; + tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 1024])]; + tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_57)[name = string("v_cache_21_cast_fp16")]; + tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 1024])]; + tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; + tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 1024])]; + tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; + int32 var_1203 = const()[name = string("op_1203"), val = int32(-1)]; + tensor var_1221_axes_0 = const()[name = string("op_1221_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257584064)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257586176)))]; + fp16 var_1209_to_fp16 = const()[name = string("op_1209_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1221_cast_fp16 = layer_norm(axes = var_1221_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1221_cast_fp16")]; + tensor var_1232_to_fp16 = const()[name = string("op_1232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(257588288)))]; + tensor var_1233_to_fp16 = const()[name = string("op_1233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259685504)))]; + tensor linear_40_cast_fp16 = linear(bias = var_1233_to_fp16, weight = var_1232_to_fp16, x = var_1221_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor var_1236_to_fp16 = const()[name = string("op_1236_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259687616)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1236_to_fp16, x = var_1221_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261784832)))]; + tensor var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263882048)))]; + tensor linear_42_cast_fp16 = linear(bias = var_1241_to_fp16, weight = var_1240_to_fp16, x = var_1221_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_1243_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1243_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1243_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1243_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1243_shape_cast_fp16_to_uint16 = cast(dtype = var_1243_shape_cast_fp16_to_uint16_dtype_0, x = var_1243_shape_cast_fp16)[name = string("cast_284")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1243_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_283")]; + int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; + tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; + tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; + tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_56)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_58 = read_state(input = k_cache1)[name = string("coreml_update_state_58")]; + tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_57)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_59 = read_state(input = v_cache1)[name = string("coreml_update_state_59")]; + int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; + int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(1024)]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; + tensor var_1259_begin_0 = const()[name = string("op_1259_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1259_end_mask_0 = const()[name = string("op_1259_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1259_cast_fp16 = slice_by_index(begin = var_1259_begin_0, end = concat_120, end_mask = var_1259_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1259_cast_fp16")]; + tensor var_1262_begin_0 = const()[name = string("op_1262_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1262_end_mask_0 = const()[name = string("op_1262_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = concat_120, end_mask = var_1262_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1262_cast_fp16")]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 16, 64])]; + tensor var_1272_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1272_cast_fp16")]; + tensor const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1272_cast_fp16, y = const_140_to_fp16)[name = string("q_43_cast_fp16")]; + tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 16, 64])]; + tensor var_1279_cast_fp16 = reshape(shape = concat_123x, x = var_1259_cast_fp16)[name = string("op_1279_cast_fp16")]; + tensor const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1279_cast_fp16, y = const_141_to_fp16)[name = string("k_55_cast_fp16")]; + tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 16, 64])]; + tensor var_1286_cast_fp16 = reshape(shape = concat_124x, x = var_1262_cast_fp16)[name = string("op_1286_cast_fp16")]; + tensor var_1287 = const()[name = string("op_1287"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_213_perm_0 = const()[name = string("transpose_213_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_214_perm_0 = const()[name = string("transpose_214_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_214 = transpose(perm = transpose_214_perm_0, x = k_55_cast_fp16)[name = string("transpose_438")]; + tensor transpose_213 = transpose(perm = transpose_213_perm_0, x = q_43_cast_fp16)[name = string("transpose_439")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_213, y = transpose_214)[name = string("qk_31_cast_fp16")]; + int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; + tensor var_1290_begin_0 = const()[name = string("op_1290_begin_0"), val = tensor([0, 0])]; + tensor var_1290_end_mask_0 = const()[name = string("op_1290_end_mask_0"), val = tensor([false, true])]; + tensor var_1290_cast_fp16 = slice_by_index(begin = var_1290_begin_0, end = concat_125, end_mask = var_1290_end_mask_0, x = mask_to_fp16)[name = string("op_1290_cast_fp16")]; + int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; + tensor var_1291_begin_0 = const()[name = string("op_1291_begin_0"), val = tensor([0, 0])]; + tensor var_1291_end_mask_0 = const()[name = string("op_1291_end_mask_0"), val = tensor([true, false])]; + tensor var_1291_cast_fp16 = slice_by_index(begin = var_1291_begin_0, end = concat_126, end_mask = var_1291_end_mask_0, x = var_1290_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1291_cast_fp16)[name = string("qk_33_cast_fp16")]; + tensor var_1294_cast_fp16 = softmax(axis = var_1203, x = qk_33_cast_fp16)[name = string("op_1294_cast_fp16")]; + bool var_1296_transpose_x_0 = const()[name = string("op_1296_transpose_x_0"), val = bool(false)]; + bool var_1296_transpose_y_0 = const()[name = string("op_1296_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1287, x = var_1286_cast_fp16)[name = string("transpose_440")]; + tensor var_1296_cast_fp16 = matmul(transpose_x = var_1296_transpose_x_0, transpose_y = var_1296_transpose_y_0, x = var_1294_cast_fp16, y = v_55_cast_fp16)[name = string("op_1296_cast_fp16")]; + tensor var_1297 = const()[name = string("op_1297"), val = tensor([0, 2, 1, 3])]; + tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 1024])]; + tensor var_1298_cast_fp16 = transpose(perm = var_1297, x = var_1296_cast_fp16)[name = string("transpose_437")]; + tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1298_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263884160)))]; + tensor var_1303_to_fp16 = const()[name = string("op_1303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265981376)))]; + tensor linear_43_cast_fp16 = linear(bias = var_1303_to_fp16, weight = var_1302_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_1310_axes_0 = const()[name = string("op_1310_axes_0"), val = tensor([-1])]; + tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265983488)))]; + tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265985600)))]; + tensor var_1310_cast_fp16 = layer_norm(axes = var_1310_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1310_cast_fp16")]; + tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265987712)))]; + tensor var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268084928)))]; + tensor linear_44_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = var_1310_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; + tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; + tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 16, 64])]; + tensor var_1340_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1340_cast_fp16")]; + tensor const_142_to_fp16 = const()[name = string("const_142_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1340_cast_fp16, y = const_142_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1346 = const()[name = string("op_1346"), val = tensor([1, 1500, 16, -1])]; + tensor var_1347_cast_fp16 = reshape(shape = var_1346, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1347_cast_fp16")]; + tensor const_143_to_fp16 = const()[name = string("const_143_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1347_cast_fp16, y = const_143_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1353 = const()[name = string("op_1353"), val = tensor([1, 1500, 16, -1])]; + tensor var_1354_cast_fp16 = reshape(shape = var_1353, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1354_cast_fp16")]; + tensor var_1355 = const()[name = string("op_1355"), val = tensor([0, 2, 1, 3])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_215_perm_0 = const()[name = string("transpose_215_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_216_perm_0 = const()[name = string("transpose_216_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_216 = transpose(perm = transpose_216_perm_0, x = k_59_cast_fp16)[name = string("transpose_434")]; + tensor transpose_215 = transpose(perm = transpose_215_perm_0, x = q_47_cast_fp16)[name = string("transpose_435")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_215, y = transpose_216)[name = string("qk_35_cast_fp16")]; + tensor var_1359_cast_fp16 = softmax(axis = var_1203, x = qk_35_cast_fp16)[name = string("op_1359_cast_fp16")]; + bool var_1361_transpose_x_0 = const()[name = string("op_1361_transpose_x_0"), val = bool(false)]; + bool var_1361_transpose_y_0 = const()[name = string("op_1361_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1355, x = var_1354_cast_fp16)[name = string("transpose_436")]; + tensor var_1361_cast_fp16 = matmul(transpose_x = var_1361_transpose_x_0, transpose_y = var_1361_transpose_y_0, x = var_1359_cast_fp16, y = v_59_cast_fp16)[name = string("op_1361_cast_fp16")]; + tensor var_1362 = const()[name = string("op_1362"), val = tensor([0, 2, 1, 3])]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 1024])]; + tensor var_1363_cast_fp16 = transpose(perm = var_1362, x = var_1361_cast_fp16)[name = string("transpose_433")]; + tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1363_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_1367_to_fp16 = const()[name = string("op_1367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268087040)))]; + tensor var_1368_to_fp16 = const()[name = string("op_1368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270184256)))]; + tensor linear_45_cast_fp16 = linear(bias = var_1368_to_fp16, weight = var_1367_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_1375_axes_0 = const()[name = string("op_1375_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270186368)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270188480)))]; + tensor var_1375_cast_fp16 = layer_norm(axes = var_1375_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1209_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1375_cast_fp16")]; + tensor var_1384_to_fp16 = const()[name = string("op_1384_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270190592)))]; + tensor var_1385_to_fp16 = const()[name = string("op_1385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278579264)))]; + tensor linear_46_cast_fp16 = linear(bias = var_1385_to_fp16, weight = var_1384_to_fp16, x = var_1375_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; + tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1390_to_fp16 = const()[name = string("op_1390_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278587520)))]; + tensor var_1391_to_fp16 = const()[name = string("op_1391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286976192)))]; + tensor linear_47_cast_fp16 = linear(bias = var_1391_to_fp16, weight = var_1390_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; + tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 1024])]; + tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_58)[name = string("k_cache_25_cast_fp16")]; + tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 1024])]; + tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_59)[name = string("v_cache_25_cast_fp16")]; + tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 1024])]; + tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; + tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 1024])]; + tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; + int32 var_1414 = const()[name = string("op_1414"), val = int32(-1)]; + tensor var_1432_axes_0 = const()[name = string("op_1432_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286978304)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286980416)))]; + fp16 var_1420_to_fp16 = const()[name = string("op_1420_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1432_cast_fp16 = layer_norm(axes = var_1432_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1432_cast_fp16")]; + tensor var_1443_to_fp16 = const()[name = string("op_1443_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286982528)))]; + tensor var_1444_to_fp16 = const()[name = string("op_1444_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289079744)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1444_to_fp16, weight = var_1443_to_fp16, x = var_1432_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1447_to_fp16 = const()[name = string("op_1447_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289081856)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1447_to_fp16, x = var_1432_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1451_to_fp16 = const()[name = string("op_1451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179072)))]; + tensor var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293276288)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1452_to_fp16, weight = var_1451_to_fp16, x = var_1432_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1454_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1454_shape_cast_fp16")]; + int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; + int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; + bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; + string var_1454_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1454_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; + tensor var_1454_shape_cast_fp16_to_uint16 = cast(dtype = var_1454_shape_cast_fp16_to_uint16_dtype_0, x = var_1454_shape_cast_fp16)[name = string("cast_282")]; + uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1454_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; + string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_281")]; + int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; + tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; + int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; + bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; + tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; + tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; + tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; + tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; + int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; + bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; + tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; + tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_58)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = k_cache1)[name = string("coreml_update_state_60")]; + tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_59)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = v_cache1)[name = string("coreml_update_state_61")]; + int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; + int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(1024)]; + int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; + bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; + tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; + tensor var_1470_begin_0 = const()[name = string("op_1470_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1470_end_mask_0 = const()[name = string("op_1470_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = concat_142, end_mask = var_1470_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1470_cast_fp16")]; + tensor var_1473_begin_0 = const()[name = string("op_1473_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1473_end_mask_0 = const()[name = string("op_1473_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1473_cast_fp16 = slice_by_index(begin = var_1473_begin_0, end = concat_142, end_mask = var_1473_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1473_cast_fp16")]; + tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 16, 64])]; + tensor var_1483_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1483_cast_fp16")]; + tensor const_144_to_fp16 = const()[name = string("const_144_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1483_cast_fp16, y = const_144_to_fp16)[name = string("q_51_cast_fp16")]; + tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 16, 64])]; + tensor var_1490_cast_fp16 = reshape(shape = concat_145x, x = var_1470_cast_fp16)[name = string("op_1490_cast_fp16")]; + tensor const_145_to_fp16 = const()[name = string("const_145_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_65_cast_fp16 = mul(x = var_1490_cast_fp16, y = const_145_to_fp16)[name = string("k_65_cast_fp16")]; + tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 16, 64])]; + tensor var_1497_cast_fp16 = reshape(shape = concat_146x, x = var_1473_cast_fp16)[name = string("op_1497_cast_fp16")]; + tensor var_1498 = const()[name = string("op_1498"), val = tensor([0, 2, 1, 3])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_217_perm_0 = const()[name = string("transpose_217_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_218_perm_0 = const()[name = string("transpose_218_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_218 = transpose(perm = transpose_218_perm_0, x = k_65_cast_fp16)[name = string("transpose_430")]; + tensor transpose_217 = transpose(perm = transpose_217_perm_0, x = q_51_cast_fp16)[name = string("transpose_431")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_217, y = transpose_218)[name = string("qk_37_cast_fp16")]; + int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; + tensor var_1501_begin_0 = const()[name = string("op_1501_begin_0"), val = tensor([0, 0])]; + tensor var_1501_end_mask_0 = const()[name = string("op_1501_end_mask_0"), val = tensor([false, true])]; + tensor var_1501_cast_fp16 = slice_by_index(begin = var_1501_begin_0, end = concat_147, end_mask = var_1501_end_mask_0, x = mask_to_fp16)[name = string("op_1501_cast_fp16")]; + int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; + tensor var_1502_begin_0 = const()[name = string("op_1502_begin_0"), val = tensor([0, 0])]; + tensor var_1502_end_mask_0 = const()[name = string("op_1502_end_mask_0"), val = tensor([true, false])]; + tensor var_1502_cast_fp16 = slice_by_index(begin = var_1502_begin_0, end = concat_148, end_mask = var_1502_end_mask_0, x = var_1501_cast_fp16)[name = string("op_1502_cast_fp16")]; + tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1502_cast_fp16)[name = string("qk_39_cast_fp16")]; + tensor var_1505_cast_fp16 = softmax(axis = var_1414, x = qk_39_cast_fp16)[name = string("op_1505_cast_fp16")]; + bool var_1507_transpose_x_0 = const()[name = string("op_1507_transpose_x_0"), val = bool(false)]; + bool var_1507_transpose_y_0 = const()[name = string("op_1507_transpose_y_0"), val = bool(false)]; + tensor v_65_cast_fp16 = transpose(perm = var_1498, x = var_1497_cast_fp16)[name = string("transpose_432")]; + tensor var_1507_cast_fp16 = matmul(transpose_x = var_1507_transpose_x_0, transpose_y = var_1507_transpose_y_0, x = var_1505_cast_fp16, y = v_65_cast_fp16)[name = string("op_1507_cast_fp16")]; + tensor var_1508 = const()[name = string("op_1508"), val = tensor([0, 2, 1, 3])]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 1024])]; + tensor var_1509_cast_fp16 = transpose(perm = var_1508, x = var_1507_cast_fp16)[name = string("transpose_429")]; + tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1509_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293278400)))]; + tensor var_1514_to_fp16 = const()[name = string("op_1514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295375616)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1514_to_fp16, weight = var_1513_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_1521_axes_0 = const()[name = string("op_1521_axes_0"), val = tensor([-1])]; + tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295377728)))]; + tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295379840)))]; + tensor var_1521_cast_fp16 = layer_norm(axes = var_1521_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1521_cast_fp16")]; + tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295381952)))]; + tensor var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297479168)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = var_1521_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; + tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; + tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; + tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; + tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 16, 64])]; + tensor var_1551_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1551_cast_fp16")]; + tensor const_146_to_fp16 = const()[name = string("const_146_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1551_cast_fp16, y = const_146_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1557 = const()[name = string("op_1557"), val = tensor([1, 1500, 16, -1])]; + tensor var_1558_cast_fp16 = reshape(shape = var_1557, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1558_cast_fp16")]; + tensor const_147_to_fp16 = const()[name = string("const_147_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_69_cast_fp16 = mul(x = var_1558_cast_fp16, y = const_147_to_fp16)[name = string("k_69_cast_fp16")]; + tensor var_1564 = const()[name = string("op_1564"), val = tensor([1, 1500, 16, -1])]; + tensor var_1565_cast_fp16 = reshape(shape = var_1564, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1565_cast_fp16")]; + tensor var_1566 = const()[name = string("op_1566"), val = tensor([0, 2, 1, 3])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_219_perm_0 = const()[name = string("transpose_219_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_220_perm_0 = const()[name = string("transpose_220_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_220 = transpose(perm = transpose_220_perm_0, x = k_69_cast_fp16)[name = string("transpose_426")]; + tensor transpose_219 = transpose(perm = transpose_219_perm_0, x = q_55_cast_fp16)[name = string("transpose_427")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_219, y = transpose_220)[name = string("qk_41_cast_fp16")]; + tensor var_1570_cast_fp16 = softmax(axis = var_1414, x = qk_41_cast_fp16)[name = string("op_1570_cast_fp16")]; + bool var_1572_transpose_x_0 = const()[name = string("op_1572_transpose_x_0"), val = bool(false)]; + bool var_1572_transpose_y_0 = const()[name = string("op_1572_transpose_y_0"), val = bool(false)]; + tensor v_69_cast_fp16 = transpose(perm = var_1566, x = var_1565_cast_fp16)[name = string("transpose_428")]; + tensor var_1572_cast_fp16 = matmul(transpose_x = var_1572_transpose_x_0, transpose_y = var_1572_transpose_y_0, x = var_1570_cast_fp16, y = v_69_cast_fp16)[name = string("op_1572_cast_fp16")]; + tensor var_1573 = const()[name = string("op_1573"), val = tensor([0, 2, 1, 3])]; + tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 1024])]; + tensor var_1574_cast_fp16 = transpose(perm = var_1573, x = var_1572_cast_fp16)[name = string("transpose_425")]; + tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1574_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1578_to_fp16 = const()[name = string("op_1578_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297481280)))]; + tensor var_1579_to_fp16 = const()[name = string("op_1579_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299578496)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1579_to_fp16, weight = var_1578_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1586_axes_0 = const()[name = string("op_1586_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299580608)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299582720)))]; + tensor var_1586_cast_fp16 = layer_norm(axes = var_1586_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1420_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1586_cast_fp16")]; + tensor var_1595_to_fp16 = const()[name = string("op_1595_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299584832)))]; + tensor var_1596_to_fp16 = const()[name = string("op_1596_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307973504)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1596_to_fp16, weight = var_1595_to_fp16, x = var_1586_cast_fp16)[name = string("linear_54_cast_fp16")]; + string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; + tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; + tensor var_1601_to_fp16 = const()[name = string("op_1601_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307981760)))]; + tensor var_1602_to_fp16 = const()[name = string("op_1602_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316370432)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1602_to_fp16, weight = var_1601_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 1024])]; + tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_60)[name = string("k_cache_29_cast_fp16")]; + tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 1024])]; + tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_61)[name = string("v_cache_29_cast_fp16")]; + tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 1024])]; + tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; + tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 1024])]; + tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; + int32 var_1625 = const()[name = string("op_1625"), val = int32(-1)]; + tensor var_1643_axes_0 = const()[name = string("op_1643_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316372544)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316374656)))]; + fp16 var_1631_to_fp16 = const()[name = string("op_1631_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1643_cast_fp16 = layer_norm(axes = var_1643_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1643_cast_fp16")]; + tensor var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316376768)))]; + tensor var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318473984)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1655_to_fp16, weight = var_1654_to_fp16, x = var_1643_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1658_to_fp16 = const()[name = string("op_1658_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318476096)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1658_to_fp16, x = var_1643_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor var_1662_to_fp16 = const()[name = string("op_1662_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320573312)))]; + tensor var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322670528)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1663_to_fp16, weight = var_1662_to_fp16, x = var_1643_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor var_1665_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1665_shape_cast_fp16")]; + int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; + int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; + bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; + string var_1665_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1665_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; + tensor var_1665_shape_cast_fp16_to_uint16 = cast(dtype = var_1665_shape_cast_fp16_to_uint16_dtype_0, x = var_1665_shape_cast_fp16)[name = string("cast_280")]; + uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1665_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; + string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_279")]; + int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; + tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; + tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_60)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = k_cache1)[name = string("coreml_update_state_62")]; + tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_61)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = v_cache1)[name = string("coreml_update_state_63")]; + int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; + int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(1024)]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; + tensor var_1681_begin_0 = const()[name = string("op_1681_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1681_end_mask_0 = const()[name = string("op_1681_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1681_cast_fp16 = slice_by_index(begin = var_1681_begin_0, end = concat_164, end_mask = var_1681_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1681_cast_fp16")]; + tensor var_1684_begin_0 = const()[name = string("op_1684_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1684_end_mask_0 = const()[name = string("op_1684_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1684_cast_fp16 = slice_by_index(begin = var_1684_begin_0, end = concat_164, end_mask = var_1684_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1684_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 16, 64])]; + tensor var_1694_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1694_cast_fp16")]; + tensor const_148_to_fp16 = const()[name = string("const_148_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1694_cast_fp16, y = const_148_to_fp16)[name = string("q_59_cast_fp16")]; + tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 16, 64])]; + tensor var_1701_cast_fp16 = reshape(shape = concat_167x, x = var_1681_cast_fp16)[name = string("op_1701_cast_fp16")]; + tensor const_149_to_fp16 = const()[name = string("const_149_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_1701_cast_fp16, y = const_149_to_fp16)[name = string("k_75_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 16, 64])]; + tensor var_1708_cast_fp16 = reshape(shape = concat_168x, x = var_1684_cast_fp16)[name = string("op_1708_cast_fp16")]; + tensor var_1709 = const()[name = string("op_1709"), val = tensor([0, 2, 1, 3])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_221_perm_0 = const()[name = string("transpose_221_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_222_perm_0 = const()[name = string("transpose_222_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_222 = transpose(perm = transpose_222_perm_0, x = k_75_cast_fp16)[name = string("transpose_422")]; + tensor transpose_221 = transpose(perm = transpose_221_perm_0, x = q_59_cast_fp16)[name = string("transpose_423")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_221, y = transpose_222)[name = string("qk_43_cast_fp16")]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; + tensor var_1712_begin_0 = const()[name = string("op_1712_begin_0"), val = tensor([0, 0])]; + tensor var_1712_end_mask_0 = const()[name = string("op_1712_end_mask_0"), val = tensor([false, true])]; + tensor var_1712_cast_fp16 = slice_by_index(begin = var_1712_begin_0, end = concat_169, end_mask = var_1712_end_mask_0, x = mask_to_fp16)[name = string("op_1712_cast_fp16")]; + int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; + tensor var_1713_begin_0 = const()[name = string("op_1713_begin_0"), val = tensor([0, 0])]; + tensor var_1713_end_mask_0 = const()[name = string("op_1713_end_mask_0"), val = tensor([true, false])]; + tensor var_1713_cast_fp16 = slice_by_index(begin = var_1713_begin_0, end = concat_170, end_mask = var_1713_end_mask_0, x = var_1712_cast_fp16)[name = string("op_1713_cast_fp16")]; + tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1713_cast_fp16)[name = string("qk_45_cast_fp16")]; + tensor var_1716_cast_fp16 = softmax(axis = var_1625, x = qk_45_cast_fp16)[name = string("op_1716_cast_fp16")]; + bool var_1718_transpose_x_0 = const()[name = string("op_1718_transpose_x_0"), val = bool(false)]; + bool var_1718_transpose_y_0 = const()[name = string("op_1718_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_1709, x = var_1708_cast_fp16)[name = string("transpose_424")]; + tensor var_1718_cast_fp16 = matmul(transpose_x = var_1718_transpose_x_0, transpose_y = var_1718_transpose_y_0, x = var_1716_cast_fp16, y = v_75_cast_fp16)[name = string("op_1718_cast_fp16")]; + tensor var_1719 = const()[name = string("op_1719"), val = tensor([0, 2, 1, 3])]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 1024])]; + tensor var_1720_cast_fp16 = transpose(perm = var_1719, x = var_1718_cast_fp16)[name = string("transpose_421")]; + tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1720_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1724_to_fp16 = const()[name = string("op_1724_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322672640)))]; + tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324769856)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1725_to_fp16, weight = var_1724_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor var_1732_axes_0 = const()[name = string("op_1732_axes_0"), val = tensor([-1])]; + tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324771968)))]; + tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324774080)))]; + tensor var_1732_cast_fp16 = layer_norm(axes = var_1732_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1732_cast_fp16")]; + tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324776192)))]; + tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326873408)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = var_1732_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; + tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; + tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 16, 64])]; + tensor var_1762_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1762_cast_fp16")]; + tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1762_cast_fp16, y = const_150_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1768 = const()[name = string("op_1768"), val = tensor([1, 1500, 16, -1])]; + tensor var_1769_cast_fp16 = reshape(shape = var_1768, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1769_cast_fp16")]; + tensor const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_1769_cast_fp16, y = const_151_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_1775 = const()[name = string("op_1775"), val = tensor([1, 1500, 16, -1])]; + tensor var_1776_cast_fp16 = reshape(shape = var_1775, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1776_cast_fp16")]; + tensor var_1777 = const()[name = string("op_1777"), val = tensor([0, 2, 1, 3])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_223_perm_0 = const()[name = string("transpose_223_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_224_perm_0 = const()[name = string("transpose_224_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_224 = transpose(perm = transpose_224_perm_0, x = k_79_cast_fp16)[name = string("transpose_418")]; + tensor transpose_223 = transpose(perm = transpose_223_perm_0, x = q_63_cast_fp16)[name = string("transpose_419")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_223, y = transpose_224)[name = string("qk_47_cast_fp16")]; + tensor var_1781_cast_fp16 = softmax(axis = var_1625, x = qk_47_cast_fp16)[name = string("op_1781_cast_fp16")]; + bool var_1783_transpose_x_0 = const()[name = string("op_1783_transpose_x_0"), val = bool(false)]; + bool var_1783_transpose_y_0 = const()[name = string("op_1783_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_1777, x = var_1776_cast_fp16)[name = string("transpose_420")]; + tensor var_1783_cast_fp16 = matmul(transpose_x = var_1783_transpose_x_0, transpose_y = var_1783_transpose_y_0, x = var_1781_cast_fp16, y = v_79_cast_fp16)[name = string("op_1783_cast_fp16")]; + tensor var_1784 = const()[name = string("op_1784"), val = tensor([0, 2, 1, 3])]; + tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 1024])]; + tensor var_1785_cast_fp16 = transpose(perm = var_1784, x = var_1783_cast_fp16)[name = string("transpose_417")]; + tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1785_cast_fp16)[name = string("x_139_cast_fp16")]; + tensor var_1789_to_fp16 = const()[name = string("op_1789_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326875520)))]; + tensor var_1790_to_fp16 = const()[name = string("op_1790_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328972736)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1790_to_fp16, weight = var_1789_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; + tensor var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328974848)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328976960)))]; + tensor var_1797_cast_fp16 = layer_norm(axes = var_1797_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1631_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1797_cast_fp16")]; + tensor var_1806_to_fp16 = const()[name = string("op_1806_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328979072)))]; + tensor var_1807_to_fp16 = const()[name = string("op_1807_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337367744)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1807_to_fp16, weight = var_1806_to_fp16, x = var_1797_cast_fp16)[name = string("linear_62_cast_fp16")]; + string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; + tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1812_to_fp16 = const()[name = string("op_1812_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337376000)))]; + tensor var_1813_to_fp16 = const()[name = string("op_1813_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345764672)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1813_to_fp16, weight = var_1812_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 1024])]; + tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_62)[name = string("k_cache_33_cast_fp16")]; + tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 1024])]; + tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_63)[name = string("v_cache_33_cast_fp16")]; + tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 1024])]; + tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; + tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 1024])]; + tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; + int32 var_1836 = const()[name = string("op_1836"), val = int32(-1)]; + tensor var_1854_axes_0 = const()[name = string("op_1854_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345766784)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345768896)))]; + fp16 var_1842_to_fp16 = const()[name = string("op_1842_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1854_cast_fp16 = layer_norm(axes = var_1854_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1854_cast_fp16")]; + tensor var_1865_to_fp16 = const()[name = string("op_1865_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345771008)))]; + tensor var_1866_to_fp16 = const()[name = string("op_1866_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347868224)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1866_to_fp16, weight = var_1865_to_fp16, x = var_1854_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor var_1869_to_fp16 = const()[name = string("op_1869_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347870336)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1869_to_fp16, x = var_1854_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor var_1873_to_fp16 = const()[name = string("op_1873_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349967552)))]; + tensor var_1874_to_fp16 = const()[name = string("op_1874_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352064768)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1874_to_fp16, weight = var_1873_to_fp16, x = var_1854_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1876_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1876_shape_cast_fp16")]; + int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; + int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; + bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; + string var_1876_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1876_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; + tensor var_1876_shape_cast_fp16_to_uint16 = cast(dtype = var_1876_shape_cast_fp16_to_uint16_dtype_0, x = var_1876_shape_cast_fp16)[name = string("cast_278")]; + uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1876_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; + string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_277")]; + int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; + tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; + tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; + tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; + tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; + int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; + bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; + tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; + tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_62)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = k_cache1)[name = string("coreml_update_state_64")]; + tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_63)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = v_cache1)[name = string("coreml_update_state_65")]; + int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; + int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(1024)]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; + tensor var_1892_begin_0 = const()[name = string("op_1892_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1892_end_mask_0 = const()[name = string("op_1892_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1892_cast_fp16 = slice_by_index(begin = var_1892_begin_0, end = concat_186, end_mask = var_1892_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1892_cast_fp16")]; + tensor var_1895_begin_0 = const()[name = string("op_1895_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1895_end_mask_0 = const()[name = string("op_1895_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1895_cast_fp16 = slice_by_index(begin = var_1895_begin_0, end = concat_186, end_mask = var_1895_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1895_cast_fp16")]; + tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 16, 64])]; + tensor var_1905_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1905_cast_fp16")]; + tensor const_152_to_fp16 = const()[name = string("const_152_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1905_cast_fp16, y = const_152_to_fp16)[name = string("q_67_cast_fp16")]; + tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 16, 64])]; + tensor var_1912_cast_fp16 = reshape(shape = concat_189x, x = var_1892_cast_fp16)[name = string("op_1912_cast_fp16")]; + tensor const_153_to_fp16 = const()[name = string("const_153_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_85_cast_fp16 = mul(x = var_1912_cast_fp16, y = const_153_to_fp16)[name = string("k_85_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 16, 64])]; + tensor var_1919_cast_fp16 = reshape(shape = concat_190x, x = var_1895_cast_fp16)[name = string("op_1919_cast_fp16")]; + tensor var_1920 = const()[name = string("op_1920"), val = tensor([0, 2, 1, 3])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_225_perm_0 = const()[name = string("transpose_225_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_226_perm_0 = const()[name = string("transpose_226_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_226 = transpose(perm = transpose_226_perm_0, x = k_85_cast_fp16)[name = string("transpose_414")]; + tensor transpose_225 = transpose(perm = transpose_225_perm_0, x = q_67_cast_fp16)[name = string("transpose_415")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_225, y = transpose_226)[name = string("qk_49_cast_fp16")]; + int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; + int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; + bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; + tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; + tensor var_1923_begin_0 = const()[name = string("op_1923_begin_0"), val = tensor([0, 0])]; + tensor var_1923_end_mask_0 = const()[name = string("op_1923_end_mask_0"), val = tensor([false, true])]; + tensor var_1923_cast_fp16 = slice_by_index(begin = var_1923_begin_0, end = concat_191, end_mask = var_1923_end_mask_0, x = mask_to_fp16)[name = string("op_1923_cast_fp16")]; + int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; + tensor var_1924_begin_0 = const()[name = string("op_1924_begin_0"), val = tensor([0, 0])]; + tensor var_1924_end_mask_0 = const()[name = string("op_1924_end_mask_0"), val = tensor([true, false])]; + tensor var_1924_cast_fp16 = slice_by_index(begin = var_1924_begin_0, end = concat_192, end_mask = var_1924_end_mask_0, x = var_1923_cast_fp16)[name = string("op_1924_cast_fp16")]; + tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1924_cast_fp16)[name = string("qk_51_cast_fp16")]; + tensor var_1927_cast_fp16 = softmax(axis = var_1836, x = qk_51_cast_fp16)[name = string("op_1927_cast_fp16")]; + bool var_1929_transpose_x_0 = const()[name = string("op_1929_transpose_x_0"), val = bool(false)]; + bool var_1929_transpose_y_0 = const()[name = string("op_1929_transpose_y_0"), val = bool(false)]; + tensor v_85_cast_fp16 = transpose(perm = var_1920, x = var_1919_cast_fp16)[name = string("transpose_416")]; + tensor var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = var_1927_cast_fp16, y = v_85_cast_fp16)[name = string("op_1929_cast_fp16")]; + tensor var_1930 = const()[name = string("op_1930"), val = tensor([0, 2, 1, 3])]; + tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 1024])]; + tensor var_1931_cast_fp16 = transpose(perm = var_1930, x = var_1929_cast_fp16)[name = string("transpose_413")]; + tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1931_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1935_to_fp16 = const()[name = string("op_1935_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352066880)))]; + tensor var_1936_to_fp16 = const()[name = string("op_1936_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354164096)))]; + tensor linear_67_cast_fp16 = linear(bias = var_1936_to_fp16, weight = var_1935_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1943_axes_0 = const()[name = string("op_1943_axes_0"), val = tensor([-1])]; + tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354166208)))]; + tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354168320)))]; + tensor var_1943_cast_fp16 = layer_norm(axes = var_1943_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1943_cast_fp16")]; + tensor var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354170432)))]; + tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356267648)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = var_1943_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; + tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; + tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; + tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 16, 64])]; + tensor var_1973_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1973_cast_fp16")]; + tensor const_154_to_fp16 = const()[name = string("const_154_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_1973_cast_fp16, y = const_154_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_1979 = const()[name = string("op_1979"), val = tensor([1, 1500, 16, -1])]; + tensor var_1980_cast_fp16 = reshape(shape = var_1979, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1980_cast_fp16")]; + tensor const_155_to_fp16 = const()[name = string("const_155_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_89_cast_fp16 = mul(x = var_1980_cast_fp16, y = const_155_to_fp16)[name = string("k_89_cast_fp16")]; + tensor var_1986 = const()[name = string("op_1986"), val = tensor([1, 1500, 16, -1])]; + tensor var_1987_cast_fp16 = reshape(shape = var_1986, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1987_cast_fp16")]; + tensor var_1988 = const()[name = string("op_1988"), val = tensor([0, 2, 1, 3])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_227_perm_0 = const()[name = string("transpose_227_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_228_perm_0 = const()[name = string("transpose_228_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_228 = transpose(perm = transpose_228_perm_0, x = k_89_cast_fp16)[name = string("transpose_410")]; + tensor transpose_227 = transpose(perm = transpose_227_perm_0, x = q_71_cast_fp16)[name = string("transpose_411")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_227, y = transpose_228)[name = string("qk_53_cast_fp16")]; + tensor var_1992_cast_fp16 = softmax(axis = var_1836, x = qk_53_cast_fp16)[name = string("op_1992_cast_fp16")]; + bool var_1994_transpose_x_0 = const()[name = string("op_1994_transpose_x_0"), val = bool(false)]; + bool var_1994_transpose_y_0 = const()[name = string("op_1994_transpose_y_0"), val = bool(false)]; + tensor v_89_cast_fp16 = transpose(perm = var_1988, x = var_1987_cast_fp16)[name = string("transpose_412")]; + tensor var_1994_cast_fp16 = matmul(transpose_x = var_1994_transpose_x_0, transpose_y = var_1994_transpose_y_0, x = var_1992_cast_fp16, y = v_89_cast_fp16)[name = string("op_1994_cast_fp16")]; + tensor var_1995 = const()[name = string("op_1995"), val = tensor([0, 2, 1, 3])]; + tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 1024])]; + tensor var_1996_cast_fp16 = transpose(perm = var_1995, x = var_1994_cast_fp16)[name = string("transpose_409")]; + tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1996_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356269760)))]; + tensor var_2001_to_fp16 = const()[name = string("op_2001_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358366976)))]; + tensor linear_69_cast_fp16 = linear(bias = var_2001_to_fp16, weight = var_2000_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_2008_axes_0 = const()[name = string("op_2008_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358369088)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358371200)))]; + tensor var_2008_cast_fp16 = layer_norm(axes = var_2008_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1842_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_2008_cast_fp16")]; + tensor var_2017_to_fp16 = const()[name = string("op_2017_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358373312)))]; + tensor var_2018_to_fp16 = const()[name = string("op_2018_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366761984)))]; + tensor linear_70_cast_fp16 = linear(bias = var_2018_to_fp16, weight = var_2017_to_fp16, x = var_2008_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; + tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_2023_to_fp16 = const()[name = string("op_2023_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366770240)))]; + tensor var_2024_to_fp16 = const()[name = string("op_2024_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375158912)))]; + tensor linear_71_cast_fp16 = linear(bias = var_2024_to_fp16, weight = var_2023_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 1024])]; + tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_64)[name = string("k_cache_37_cast_fp16")]; + tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 1024])]; + tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_65)[name = string("v_cache_37_cast_fp16")]; + tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 1024])]; + tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; + tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 1024])]; + tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; + int32 var_2047 = const()[name = string("op_2047"), val = int32(-1)]; + tensor var_2065_axes_0 = const()[name = string("op_2065_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375161024)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375163136)))]; + fp16 var_2053_to_fp16 = const()[name = string("op_2053_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2065_cast_fp16 = layer_norm(axes = var_2065_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2065_cast_fp16")]; + tensor var_2076_to_fp16 = const()[name = string("op_2076_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375165248)))]; + tensor var_2077_to_fp16 = const()[name = string("op_2077_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377262464)))]; + tensor linear_72_cast_fp16 = linear(bias = var_2077_to_fp16, weight = var_2076_to_fp16, x = var_2065_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_2080_to_fp16 = const()[name = string("op_2080_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377264576)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2080_to_fp16, x = var_2065_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_2084_to_fp16 = const()[name = string("op_2084_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379361792)))]; + tensor var_2085_to_fp16 = const()[name = string("op_2085_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381459008)))]; + tensor linear_74_cast_fp16 = linear(bias = var_2085_to_fp16, weight = var_2084_to_fp16, x = var_2065_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2087_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2087_shape_cast_fp16")]; + int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; + int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; + bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; + string var_2087_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2087_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; + tensor var_2087_shape_cast_fp16_to_uint16 = cast(dtype = var_2087_shape_cast_fp16_to_uint16_dtype_0, x = var_2087_shape_cast_fp16)[name = string("cast_276")]; + uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2087_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; + string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_275")]; + int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; + tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; + tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; + tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_64)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = k_cache1)[name = string("coreml_update_state_66")]; + tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_65)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = v_cache1)[name = string("coreml_update_state_67")]; + int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; + int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(1024)]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; + tensor var_2103_begin_0 = const()[name = string("op_2103_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2103_end_mask_0 = const()[name = string("op_2103_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2103_cast_fp16 = slice_by_index(begin = var_2103_begin_0, end = concat_208, end_mask = var_2103_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2103_cast_fp16")]; + tensor var_2106_begin_0 = const()[name = string("op_2106_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2106_end_mask_0 = const()[name = string("op_2106_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = concat_208, end_mask = var_2106_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2106_cast_fp16")]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 16, 64])]; + tensor var_2116_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2116_cast_fp16")]; + tensor const_156_to_fp16 = const()[name = string("const_156_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2116_cast_fp16, y = const_156_to_fp16)[name = string("q_75_cast_fp16")]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 16, 64])]; + tensor var_2123_cast_fp16 = reshape(shape = concat_211x, x = var_2103_cast_fp16)[name = string("op_2123_cast_fp16")]; + tensor const_157_to_fp16 = const()[name = string("const_157_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2123_cast_fp16, y = const_157_to_fp16)[name = string("k_95_cast_fp16")]; + tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 16, 64])]; + tensor var_2130_cast_fp16 = reshape(shape = concat_212x, x = var_2106_cast_fp16)[name = string("op_2130_cast_fp16")]; + tensor var_2131 = const()[name = string("op_2131"), val = tensor([0, 2, 1, 3])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_229_perm_0 = const()[name = string("transpose_229_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_230_perm_0 = const()[name = string("transpose_230_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_230 = transpose(perm = transpose_230_perm_0, x = k_95_cast_fp16)[name = string("transpose_406")]; + tensor transpose_229 = transpose(perm = transpose_229_perm_0, x = q_75_cast_fp16)[name = string("transpose_407")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_229, y = transpose_230)[name = string("qk_55_cast_fp16")]; + int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; + int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; + bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; + tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; + tensor var_2134_begin_0 = const()[name = string("op_2134_begin_0"), val = tensor([0, 0])]; + tensor var_2134_end_mask_0 = const()[name = string("op_2134_end_mask_0"), val = tensor([false, true])]; + tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = concat_213, end_mask = var_2134_end_mask_0, x = mask_to_fp16)[name = string("op_2134_cast_fp16")]; + int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; + tensor var_2135_begin_0 = const()[name = string("op_2135_begin_0"), val = tensor([0, 0])]; + tensor var_2135_end_mask_0 = const()[name = string("op_2135_end_mask_0"), val = tensor([true, false])]; + tensor var_2135_cast_fp16 = slice_by_index(begin = var_2135_begin_0, end = concat_214, end_mask = var_2135_end_mask_0, x = var_2134_cast_fp16)[name = string("op_2135_cast_fp16")]; + tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2135_cast_fp16)[name = string("qk_57_cast_fp16")]; + tensor var_2138_cast_fp16 = softmax(axis = var_2047, x = qk_57_cast_fp16)[name = string("op_2138_cast_fp16")]; + bool var_2140_transpose_x_0 = const()[name = string("op_2140_transpose_x_0"), val = bool(false)]; + bool var_2140_transpose_y_0 = const()[name = string("op_2140_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2131, x = var_2130_cast_fp16)[name = string("transpose_408")]; + tensor var_2140_cast_fp16 = matmul(transpose_x = var_2140_transpose_x_0, transpose_y = var_2140_transpose_y_0, x = var_2138_cast_fp16, y = v_95_cast_fp16)[name = string("op_2140_cast_fp16")]; + tensor var_2141 = const()[name = string("op_2141"), val = tensor([0, 2, 1, 3])]; + tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 1024])]; + tensor var_2142_cast_fp16 = transpose(perm = var_2141, x = var_2140_cast_fp16)[name = string("transpose_405")]; + tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2142_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_2146_to_fp16 = const()[name = string("op_2146_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381461120)))]; + tensor var_2147_to_fp16 = const()[name = string("op_2147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383558336)))]; + tensor linear_75_cast_fp16 = linear(bias = var_2147_to_fp16, weight = var_2146_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; + tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([-1])]; + tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383560448)))]; + tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383562560)))]; + tensor var_2154_cast_fp16 = layer_norm(axes = var_2154_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2154_cast_fp16")]; + tensor var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383564672)))]; + tensor var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385661888)))]; + tensor linear_76_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = var_2154_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; + tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; + tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; + tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; + tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 16, 64])]; + tensor var_2184_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2184_cast_fp16")]; + tensor const_158_to_fp16 = const()[name = string("const_158_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2184_cast_fp16, y = const_158_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2190 = const()[name = string("op_2190"), val = tensor([1, 1500, 16, -1])]; + tensor var_2191_cast_fp16 = reshape(shape = var_2190, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2191_cast_fp16")]; + tensor const_159_to_fp16 = const()[name = string("const_159_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2191_cast_fp16, y = const_159_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2197 = const()[name = string("op_2197"), val = tensor([1, 1500, 16, -1])]; + tensor var_2198_cast_fp16 = reshape(shape = var_2197, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2198_cast_fp16")]; + tensor var_2199 = const()[name = string("op_2199"), val = tensor([0, 2, 1, 3])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_231_perm_0 = const()[name = string("transpose_231_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_232_perm_0 = const()[name = string("transpose_232_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_232 = transpose(perm = transpose_232_perm_0, x = k_99_cast_fp16)[name = string("transpose_402")]; + tensor transpose_231 = transpose(perm = transpose_231_perm_0, x = q_79_cast_fp16)[name = string("transpose_403")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_231, y = transpose_232)[name = string("qk_59_cast_fp16")]; + tensor var_2203_cast_fp16 = softmax(axis = var_2047, x = qk_59_cast_fp16)[name = string("op_2203_cast_fp16")]; + bool var_2205_transpose_x_0 = const()[name = string("op_2205_transpose_x_0"), val = bool(false)]; + bool var_2205_transpose_y_0 = const()[name = string("op_2205_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2199, x = var_2198_cast_fp16)[name = string("transpose_404")]; + tensor var_2205_cast_fp16 = matmul(transpose_x = var_2205_transpose_x_0, transpose_y = var_2205_transpose_y_0, x = var_2203_cast_fp16, y = v_99_cast_fp16)[name = string("op_2205_cast_fp16")]; + tensor var_2206 = const()[name = string("op_2206"), val = tensor([0, 2, 1, 3])]; + tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 1024])]; + tensor var_2207_cast_fp16 = transpose(perm = var_2206, x = var_2205_cast_fp16)[name = string("transpose_401")]; + tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2207_cast_fp16)[name = string("x_175_cast_fp16")]; + tensor var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385664000)))]; + tensor var_2212_to_fp16 = const()[name = string("op_2212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387761216)))]; + tensor linear_77_cast_fp16 = linear(bias = var_2212_to_fp16, weight = var_2211_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; + tensor var_2219_axes_0 = const()[name = string("op_2219_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387763328)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387765440)))]; + tensor var_2219_cast_fp16 = layer_norm(axes = var_2219_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2053_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2219_cast_fp16")]; + tensor var_2228_to_fp16 = const()[name = string("op_2228_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387767552)))]; + tensor var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396156224)))]; + tensor linear_78_cast_fp16 = linear(bias = var_2229_to_fp16, weight = var_2228_to_fp16, x = var_2219_cast_fp16)[name = string("linear_78_cast_fp16")]; + string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; + tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396164480)))]; + tensor var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404553152)))]; + tensor linear_79_cast_fp16 = linear(bias = var_2235_to_fp16, weight = var_2234_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; + tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 1024])]; + tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_66)[name = string("k_cache_41_cast_fp16")]; + tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 1024])]; + tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_67)[name = string("v_cache_41_cast_fp16")]; + tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 1024])]; + tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; + tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 1024])]; + tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; + int32 var_2258 = const()[name = string("op_2258"), val = int32(-1)]; + tensor var_2276_axes_0 = const()[name = string("op_2276_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404555264)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404557376)))]; + fp16 var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2276_cast_fp16 = layer_norm(axes = var_2276_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2276_cast_fp16")]; + tensor var_2287_to_fp16 = const()[name = string("op_2287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404559488)))]; + tensor var_2288_to_fp16 = const()[name = string("op_2288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406656704)))]; + tensor linear_80_cast_fp16 = linear(bias = var_2288_to_fp16, weight = var_2287_to_fp16, x = var_2276_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_2291_to_fp16 = const()[name = string("op_2291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406658816)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2291_to_fp16, x = var_2276_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408756032)))]; + tensor var_2296_to_fp16 = const()[name = string("op_2296_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410853248)))]; + tensor linear_82_cast_fp16 = linear(bias = var_2296_to_fp16, weight = var_2295_to_fp16, x = var_2276_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor var_2298_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2298_shape_cast_fp16")]; + int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; + int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; + bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; + string var_2298_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2298_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; + tensor var_2298_shape_cast_fp16_to_uint16 = cast(dtype = var_2298_shape_cast_fp16_to_uint16_dtype_0, x = var_2298_shape_cast_fp16)[name = string("cast_274")]; + uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2298_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; + string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_273")]; + int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; + tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; + tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; + tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; + tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; + int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; + bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; + tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; + tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_66)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = k_cache1)[name = string("coreml_update_state_68")]; + tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_67)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = v_cache1)[name = string("coreml_update_state_69")]; + int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; + int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(1024)]; + int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; + bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; + tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; + tensor var_2314_begin_0 = const()[name = string("op_2314_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2314_end_mask_0 = const()[name = string("op_2314_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = concat_230, end_mask = var_2314_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2314_cast_fp16")]; + tensor var_2317_begin_0 = const()[name = string("op_2317_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2317_end_mask_0 = const()[name = string("op_2317_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2317_cast_fp16 = slice_by_index(begin = var_2317_begin_0, end = concat_230, end_mask = var_2317_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2317_cast_fp16")]; + tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 16, 64])]; + tensor var_2327_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2327_cast_fp16")]; + tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2327_cast_fp16, y = const_160_to_fp16)[name = string("q_83_cast_fp16")]; + tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 16, 64])]; + tensor var_2334_cast_fp16 = reshape(shape = concat_233x, x = var_2314_cast_fp16)[name = string("op_2334_cast_fp16")]; + tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_105_cast_fp16 = mul(x = var_2334_cast_fp16, y = const_161_to_fp16)[name = string("k_105_cast_fp16")]; + tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 16, 64])]; + tensor var_2341_cast_fp16 = reshape(shape = concat_234x, x = var_2317_cast_fp16)[name = string("op_2341_cast_fp16")]; + tensor var_2342 = const()[name = string("op_2342"), val = tensor([0, 2, 1, 3])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_233_perm_0 = const()[name = string("transpose_233_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_234_perm_0 = const()[name = string("transpose_234_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_234 = transpose(perm = transpose_234_perm_0, x = k_105_cast_fp16)[name = string("transpose_398")]; + tensor transpose_233 = transpose(perm = transpose_233_perm_0, x = q_83_cast_fp16)[name = string("transpose_399")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_233, y = transpose_234)[name = string("qk_61_cast_fp16")]; + int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; + int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; + bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; + tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; + tensor var_2345_begin_0 = const()[name = string("op_2345_begin_0"), val = tensor([0, 0])]; + tensor var_2345_end_mask_0 = const()[name = string("op_2345_end_mask_0"), val = tensor([false, true])]; + tensor var_2345_cast_fp16 = slice_by_index(begin = var_2345_begin_0, end = concat_235, end_mask = var_2345_end_mask_0, x = mask_to_fp16)[name = string("op_2345_cast_fp16")]; + int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; + tensor var_2346_begin_0 = const()[name = string("op_2346_begin_0"), val = tensor([0, 0])]; + tensor var_2346_end_mask_0 = const()[name = string("op_2346_end_mask_0"), val = tensor([true, false])]; + tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = concat_236, end_mask = var_2346_end_mask_0, x = var_2345_cast_fp16)[name = string("op_2346_cast_fp16")]; + tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2346_cast_fp16)[name = string("qk_63_cast_fp16")]; + tensor var_2349_cast_fp16 = softmax(axis = var_2258, x = qk_63_cast_fp16)[name = string("op_2349_cast_fp16")]; + bool var_2351_transpose_x_0 = const()[name = string("op_2351_transpose_x_0"), val = bool(false)]; + bool var_2351_transpose_y_0 = const()[name = string("op_2351_transpose_y_0"), val = bool(false)]; + tensor v_105_cast_fp16 = transpose(perm = var_2342, x = var_2341_cast_fp16)[name = string("transpose_400")]; + tensor var_2351_cast_fp16 = matmul(transpose_x = var_2351_transpose_x_0, transpose_y = var_2351_transpose_y_0, x = var_2349_cast_fp16, y = v_105_cast_fp16)[name = string("op_2351_cast_fp16")]; + tensor var_2352 = const()[name = string("op_2352"), val = tensor([0, 2, 1, 3])]; + tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 1024])]; + tensor var_2353_cast_fp16 = transpose(perm = var_2352, x = var_2351_cast_fp16)[name = string("transpose_397")]; + tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2353_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_2357_to_fp16 = const()[name = string("op_2357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410855360)))]; + tensor var_2358_to_fp16 = const()[name = string("op_2358_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412952576)))]; + tensor linear_83_cast_fp16 = linear(bias = var_2358_to_fp16, weight = var_2357_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; + tensor var_2365_axes_0 = const()[name = string("op_2365_axes_0"), val = tensor([-1])]; + tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412954688)))]; + tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412956800)))]; + tensor var_2365_cast_fp16 = layer_norm(axes = var_2365_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2365_cast_fp16")]; + tensor var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412958912)))]; + tensor var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415056128)))]; + tensor linear_84_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = var_2365_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; + tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; + tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; + tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; + tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 16, 64])]; + tensor var_2395_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2395_cast_fp16")]; + tensor const_162_to_fp16 = const()[name = string("const_162_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2395_cast_fp16, y = const_162_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2401 = const()[name = string("op_2401"), val = tensor([1, 1500, 16, -1])]; + tensor var_2402_cast_fp16 = reshape(shape = var_2401, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2402_cast_fp16")]; + tensor const_163_to_fp16 = const()[name = string("const_163_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_109_cast_fp16 = mul(x = var_2402_cast_fp16, y = const_163_to_fp16)[name = string("k_109_cast_fp16")]; + tensor var_2408 = const()[name = string("op_2408"), val = tensor([1, 1500, 16, -1])]; + tensor var_2409_cast_fp16 = reshape(shape = var_2408, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2409_cast_fp16")]; + tensor var_2410 = const()[name = string("op_2410"), val = tensor([0, 2, 1, 3])]; + bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; + bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; + tensor transpose_235_perm_0 = const()[name = string("transpose_235_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_236_perm_0 = const()[name = string("transpose_236_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_236 = transpose(perm = transpose_236_perm_0, x = k_109_cast_fp16)[name = string("transpose_394")]; + tensor transpose_235 = transpose(perm = transpose_235_perm_0, x = q_87_cast_fp16)[name = string("transpose_395")]; + tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_235, y = transpose_236)[name = string("qk_65_cast_fp16")]; + tensor var_2414_cast_fp16 = softmax(axis = var_2258, x = qk_65_cast_fp16)[name = string("op_2414_cast_fp16")]; + bool var_2416_transpose_x_0 = const()[name = string("op_2416_transpose_x_0"), val = bool(false)]; + bool var_2416_transpose_y_0 = const()[name = string("op_2416_transpose_y_0"), val = bool(false)]; + tensor v_109_cast_fp16 = transpose(perm = var_2410, x = var_2409_cast_fp16)[name = string("transpose_396")]; + tensor var_2416_cast_fp16 = matmul(transpose_x = var_2416_transpose_x_0, transpose_y = var_2416_transpose_y_0, x = var_2414_cast_fp16, y = v_109_cast_fp16)[name = string("op_2416_cast_fp16")]; + tensor var_2417 = const()[name = string("op_2417"), val = tensor([0, 2, 1, 3])]; + tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 1024])]; + tensor var_2418_cast_fp16 = transpose(perm = var_2417, x = var_2416_cast_fp16)[name = string("transpose_393")]; + tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2418_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_2422_to_fp16 = const()[name = string("op_2422_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415058240)))]; + tensor var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417155456)))]; + tensor linear_85_cast_fp16 = linear(bias = var_2423_to_fp16, weight = var_2422_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_2430_axes_0 = const()[name = string("op_2430_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417157568)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417159680)))]; + tensor var_2430_cast_fp16 = layer_norm(axes = var_2430_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2264_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2430_cast_fp16")]; + tensor var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417161792)))]; + tensor var_2440_to_fp16 = const()[name = string("op_2440_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425550464)))]; + tensor linear_86_cast_fp16 = linear(bias = var_2440_to_fp16, weight = var_2439_to_fp16, x = var_2430_cast_fp16)[name = string("linear_86_cast_fp16")]; + string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; + tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; + tensor var_2445_to_fp16 = const()[name = string("op_2445_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425558720)))]; + tensor var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433947392)))]; + tensor linear_87_cast_fp16 = linear(bias = var_2446_to_fp16, weight = var_2445_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 1024])]; + tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_68)[name = string("k_cache_45_cast_fp16")]; + tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 1024])]; + tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_69)[name = string("v_cache_45_cast_fp16")]; + tensor k_cache_47_begin_0 = const()[name = string("k_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_47_end_0 = const()[name = string("k_cache_47_end_0"), val = tensor([12, 1, 1500, 1024])]; + tensor k_cache_47_end_mask_0 = const()[name = string("k_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_47_squeeze_mask_0 = const()[name = string("k_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_47_cast_fp16 = slice_by_index(begin = k_cache_47_begin_0, end = k_cache_47_end_0, end_mask = k_cache_47_end_mask_0, squeeze_mask = k_cache_47_squeeze_mask_0, x = read_state_2)[name = string("k_cache_47_cast_fp16")]; + tensor v_cache_47_begin_0 = const()[name = string("v_cache_47_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_47_end_0 = const()[name = string("v_cache_47_end_0"), val = tensor([12, 1, 1500, 1024])]; + tensor v_cache_47_end_mask_0 = const()[name = string("v_cache_47_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_47_squeeze_mask_0 = const()[name = string("v_cache_47_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_47_cast_fp16 = slice_by_index(begin = v_cache_47_begin_0, end = v_cache_47_end_0, end_mask = v_cache_47_end_mask_0, squeeze_mask = v_cache_47_squeeze_mask_0, x = read_state_3)[name = string("v_cache_47_cast_fp16")]; + int32 var_2469 = const()[name = string("op_2469"), val = int32(-1)]; + tensor var_2487_axes_0 = const()[name = string("op_2487_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433949504)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433951616)))]; + fp16 var_2475_to_fp16 = const()[name = string("op_2475_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2487_cast_fp16 = layer_norm(axes = var_2487_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2487_cast_fp16")]; + tensor var_2498_to_fp16 = const()[name = string("op_2498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(433953728)))]; + tensor var_2499_to_fp16 = const()[name = string("op_2499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436050944)))]; + tensor linear_88_cast_fp16 = linear(bias = var_2499_to_fp16, weight = var_2498_to_fp16, x = var_2487_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2502_to_fp16 = const()[name = string("op_2502_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436053056)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2502_to_fp16, x = var_2487_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor var_2506_to_fp16 = const()[name = string("op_2506_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438150272)))]; + tensor var_2507_to_fp16 = const()[name = string("op_2507_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440247488)))]; + tensor linear_90_cast_fp16 = linear(bias = var_2507_to_fp16, weight = var_2506_to_fp16, x = var_2487_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_2509_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2509_shape_cast_fp16")]; + int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; + int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; + bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; + string var_2509_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2509_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; + tensor var_2509_shape_cast_fp16_to_uint16 = cast(dtype = var_2509_shape_cast_fp16_to_uint16_dtype_0, x = var_2509_shape_cast_fp16)[name = string("cast_272")]; + uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2509_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; + string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_271")]; + int32 end_step_25 = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step_25")]; + tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step_25)[name = string("expand_dims_179")]; + tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; + tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; + tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; + tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; + int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; + bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; + tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; + tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_68)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = k_cache1)[name = string("coreml_update_state_70")]; + tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_69)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = v_cache1)[name = string("coreml_update_state_71")]; + int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; + int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(1024)]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step_25, concat_252_values2_0))[name = string("concat_252")]; + tensor var_2525_begin_0 = const()[name = string("op_2525_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2525_end_mask_0 = const()[name = string("op_2525_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2525_cast_fp16 = slice_by_index(begin = var_2525_begin_0, end = concat_252, end_mask = var_2525_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2525_cast_fp16")]; + tensor var_2528_begin_0 = const()[name = string("op_2528_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2528_end_mask_0 = const()[name = string("op_2528_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2528_cast_fp16 = slice_by_index(begin = var_2528_begin_0, end = concat_252, end_mask = var_2528_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2528_cast_fp16")]; + tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 16, 64])]; + tensor var_2538_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2538_cast_fp16")]; + tensor const_164_to_fp16 = const()[name = string("const_164_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2538_cast_fp16, y = const_164_to_fp16)[name = string("q_91_cast_fp16")]; + tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 16, 64])]; + tensor var_2545_cast_fp16 = reshape(shape = concat_255x, x = var_2525_cast_fp16)[name = string("op_2545_cast_fp16")]; + tensor const_165_to_fp16 = const()[name = string("const_165_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_2545_cast_fp16, y = const_165_to_fp16)[name = string("k_115_cast_fp16")]; + tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 16, 64])]; + tensor var_2552_cast_fp16 = reshape(shape = concat_256x, x = var_2528_cast_fp16)[name = string("op_2552_cast_fp16")]; + tensor var_2553 = const()[name = string("op_2553"), val = tensor([0, 2, 1, 3])]; + bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; + bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; + tensor transpose_237_perm_0 = const()[name = string("transpose_237_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_238_perm_0 = const()[name = string("transpose_238_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_238 = transpose(perm = transpose_238_perm_0, x = k_115_cast_fp16)[name = string("transpose_390")]; + tensor transpose_237 = transpose(perm = transpose_237_perm_0, x = q_91_cast_fp16)[name = string("transpose_391")]; + tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_237, y = transpose_238)[name = string("qk_67_cast_fp16")]; + int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; + int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; + bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; + tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; + tensor var_2556_begin_0 = const()[name = string("op_2556_begin_0"), val = tensor([0, 0])]; + tensor var_2556_end_mask_0 = const()[name = string("op_2556_end_mask_0"), val = tensor([false, true])]; + tensor var_2556_cast_fp16 = slice_by_index(begin = var_2556_begin_0, end = concat_257, end_mask = var_2556_end_mask_0, x = mask_to_fp16)[name = string("op_2556_cast_fp16")]; + int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; + tensor var_2557_begin_0 = const()[name = string("op_2557_begin_0"), val = tensor([0, 0])]; + tensor var_2557_end_mask_0 = const()[name = string("op_2557_end_mask_0"), val = tensor([true, false])]; + tensor var_2557_cast_fp16 = slice_by_index(begin = var_2557_begin_0, end = concat_258, end_mask = var_2557_end_mask_0, x = var_2556_cast_fp16)[name = string("op_2557_cast_fp16")]; + tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2557_cast_fp16)[name = string("qk_69_cast_fp16")]; + tensor var_2560_cast_fp16 = softmax(axis = var_2469, x = qk_69_cast_fp16)[name = string("op_2560_cast_fp16")]; + bool var_2562_transpose_x_0 = const()[name = string("op_2562_transpose_x_0"), val = bool(false)]; + bool var_2562_transpose_y_0 = const()[name = string("op_2562_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_2553, x = var_2552_cast_fp16)[name = string("transpose_392")]; + tensor var_2562_cast_fp16 = matmul(transpose_x = var_2562_transpose_x_0, transpose_y = var_2562_transpose_y_0, x = var_2560_cast_fp16, y = v_115_cast_fp16)[name = string("op_2562_cast_fp16")]; + tensor var_2563 = const()[name = string("op_2563"), val = tensor([0, 2, 1, 3])]; + tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 1024])]; + tensor var_2564_cast_fp16 = transpose(perm = var_2563, x = var_2562_cast_fp16)[name = string("transpose_389")]; + tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2564_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_2568_to_fp16 = const()[name = string("op_2568_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440249600)))]; + tensor var_2569_to_fp16 = const()[name = string("op_2569_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442346816)))]; + tensor linear_91_cast_fp16 = linear(bias = var_2569_to_fp16, weight = var_2568_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_2576_axes_0 = const()[name = string("op_2576_axes_0"), val = tensor([-1])]; + tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442348928)))]; + tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442351040)))]; + tensor var_2576_cast_fp16 = layer_norm(axes = var_2576_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2576_cast_fp16")]; + tensor var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442353152)))]; + tensor var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444450368)))]; + tensor linear_92_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = var_2576_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; + tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; + tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; + tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; + tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_47_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 16, 64])]; + tensor var_2606_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2606_cast_fp16")]; + tensor const_166_to_fp16 = const()[name = string("const_166_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_95_cast_fp16 = mul(x = var_2606_cast_fp16, y = const_166_to_fp16)[name = string("q_95_cast_fp16")]; + tensor var_2612 = const()[name = string("op_2612"), val = tensor([1, 1500, 16, -1])]; + tensor var_2613_cast_fp16 = reshape(shape = var_2612, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2613_cast_fp16")]; + tensor const_167_to_fp16 = const()[name = string("const_167_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_119_cast_fp16 = mul(x = var_2613_cast_fp16, y = const_167_to_fp16)[name = string("k_119_cast_fp16")]; + tensor var_2619 = const()[name = string("op_2619"), val = tensor([1, 1500, 16, -1])]; + tensor var_2620_cast_fp16 = reshape(shape = var_2619, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2620_cast_fp16")]; + tensor var_2621 = const()[name = string("op_2621"), val = tensor([0, 2, 1, 3])]; + bool qk_71_transpose_x_0 = const()[name = string("qk_71_transpose_x_0"), val = bool(false)]; + bool qk_71_transpose_y_0 = const()[name = string("qk_71_transpose_y_0"), val = bool(false)]; + tensor transpose_239_perm_0 = const()[name = string("transpose_239_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_240_perm_0 = const()[name = string("transpose_240_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_240 = transpose(perm = transpose_240_perm_0, x = k_119_cast_fp16)[name = string("transpose_386")]; + tensor transpose_239 = transpose(perm = transpose_239_perm_0, x = q_95_cast_fp16)[name = string("transpose_387")]; + tensor qk_71_cast_fp16 = matmul(transpose_x = qk_71_transpose_x_0, transpose_y = qk_71_transpose_y_0, x = transpose_239, y = transpose_240)[name = string("qk_71_cast_fp16")]; + tensor var_2625_cast_fp16 = softmax(axis = var_2469, x = qk_71_cast_fp16)[name = string("op_2625_cast_fp16")]; + bool var_2627_transpose_x_0 = const()[name = string("op_2627_transpose_x_0"), val = bool(false)]; + bool var_2627_transpose_y_0 = const()[name = string("op_2627_transpose_y_0"), val = bool(false)]; + tensor v_119_cast_fp16 = transpose(perm = var_2621, x = var_2620_cast_fp16)[name = string("transpose_388")]; + tensor var_2627_cast_fp16 = matmul(transpose_x = var_2627_transpose_x_0, transpose_y = var_2627_transpose_y_0, x = var_2625_cast_fp16, y = v_119_cast_fp16)[name = string("op_2627_cast_fp16")]; + tensor var_2628 = const()[name = string("op_2628"), val = tensor([0, 2, 1, 3])]; + tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 1024])]; + tensor var_2629_cast_fp16 = transpose(perm = var_2628, x = var_2627_cast_fp16)[name = string("transpose_385")]; + tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2629_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_2633_to_fp16 = const()[name = string("op_2633_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444452480)))]; + tensor var_2634_to_fp16 = const()[name = string("op_2634_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446549696)))]; + tensor linear_93_cast_fp16 = linear(bias = var_2634_to_fp16, weight = var_2633_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_2641_axes_0 = const()[name = string("op_2641_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446551808)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446553920)))]; + tensor var_2641_cast_fp16 = layer_norm(axes = var_2641_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2475_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2641_cast_fp16")]; + tensor var_2650_to_fp16 = const()[name = string("op_2650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446556032)))]; + tensor var_2651_to_fp16 = const()[name = string("op_2651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454944704)))]; + tensor linear_94_cast_fp16 = linear(bias = var_2651_to_fp16, weight = var_2650_to_fp16, x = var_2641_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; + tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2656_to_fp16 = const()[name = string("op_2656_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454952960)))]; + tensor var_2657_to_fp16 = const()[name = string("op_2657_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463341632)))]; + tensor linear_95_cast_fp16 = linear(bias = var_2657_to_fp16, weight = var_2656_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor k_cache_49_begin_0 = const()[name = string("k_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_49_end_0 = const()[name = string("k_cache_49_end_0"), val = tensor([13, 1, 448, 1024])]; + tensor k_cache_49_end_mask_0 = const()[name = string("k_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_49_squeeze_mask_0 = const()[name = string("k_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_49_cast_fp16 = slice_by_index(begin = k_cache_49_begin_0, end = k_cache_49_end_0, end_mask = k_cache_49_end_mask_0, squeeze_mask = k_cache_49_squeeze_mask_0, x = coreml_update_state_70)[name = string("k_cache_49_cast_fp16")]; + tensor v_cache_49_begin_0 = const()[name = string("v_cache_49_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_49_end_0 = const()[name = string("v_cache_49_end_0"), val = tensor([13, 1, 448, 1024])]; + tensor v_cache_49_end_mask_0 = const()[name = string("v_cache_49_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_49_squeeze_mask_0 = const()[name = string("v_cache_49_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_49_cast_fp16 = slice_by_index(begin = v_cache_49_begin_0, end = v_cache_49_end_0, end_mask = v_cache_49_end_mask_0, squeeze_mask = v_cache_49_squeeze_mask_0, x = coreml_update_state_71)[name = string("v_cache_49_cast_fp16")]; + tensor k_cache_51_begin_0 = const()[name = string("k_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor k_cache_51_end_0 = const()[name = string("k_cache_51_end_0"), val = tensor([13, 1, 1500, 1024])]; + tensor k_cache_51_end_mask_0 = const()[name = string("k_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_51_squeeze_mask_0 = const()[name = string("k_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_51_cast_fp16 = slice_by_index(begin = k_cache_51_begin_0, end = k_cache_51_end_0, end_mask = k_cache_51_end_mask_0, squeeze_mask = k_cache_51_squeeze_mask_0, x = read_state_2)[name = string("k_cache_51_cast_fp16")]; + tensor v_cache_51_begin_0 = const()[name = string("v_cache_51_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor v_cache_51_end_0 = const()[name = string("v_cache_51_end_0"), val = tensor([13, 1, 1500, 1024])]; + tensor v_cache_51_end_mask_0 = const()[name = string("v_cache_51_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_51_squeeze_mask_0 = const()[name = string("v_cache_51_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_51_cast_fp16 = slice_by_index(begin = v_cache_51_begin_0, end = v_cache_51_end_0, end_mask = v_cache_51_end_mask_0, squeeze_mask = v_cache_51_squeeze_mask_0, x = read_state_3)[name = string("v_cache_51_cast_fp16")]; + int32 var_2680 = const()[name = string("op_2680"), val = int32(-1)]; + tensor var_2698_axes_0 = const()[name = string("op_2698_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463343744)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463345856)))]; + fp16 var_2686_to_fp16 = const()[name = string("op_2686_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2698_cast_fp16 = layer_norm(axes = var_2698_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2698_cast_fp16")]; + tensor var_2709_to_fp16 = const()[name = string("op_2709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463347968)))]; + tensor var_2710_to_fp16 = const()[name = string("op_2710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465445184)))]; + tensor linear_96_cast_fp16 = linear(bias = var_2710_to_fp16, weight = var_2709_to_fp16, x = var_2698_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_2713_to_fp16 = const()[name = string("op_2713_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465447296)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2713_to_fp16, x = var_2698_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_2717_to_fp16 = const()[name = string("op_2717_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467544512)))]; + tensor var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469641728)))]; + tensor linear_98_cast_fp16 = linear(bias = var_2718_to_fp16, weight = var_2717_to_fp16, x = var_2698_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_2720_shape_cast_fp16 = shape(x = linear_96_cast_fp16)[name = string("op_2720_shape_cast_fp16")]; + int32 gather_146_axis_0 = const()[name = string("gather_146_axis_0"), val = int32(0)]; + int32 gather_146_batch_dims_0 = const()[name = string("gather_146_batch_dims_0"), val = int32(0)]; + bool gather_146_validate_indices_0 = const()[name = string("gather_146_validate_indices_0"), val = bool(false)]; + string var_2720_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2720_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_146_to_uint16 = const()[name = string("select_146_to_uint16"), val = uint16(1)]; + tensor var_2720_shape_cast_fp16_to_uint16 = cast(dtype = var_2720_shape_cast_fp16_to_uint16_dtype_0, x = var_2720_shape_cast_fp16)[name = string("cast_270")]; + uint16 gather_146_cast_uint16 = gather(axis = gather_146_axis_0, batch_dims = gather_146_batch_dims_0, indices = select_146_to_uint16, validate_indices = gather_146_validate_indices_0, x = var_2720_shape_cast_fp16_to_uint16)[name = string("gather_146_cast_uint16")]; + string gather_146_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_146_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_146_cast_uint16_to_int32 = cast(dtype = gather_146_cast_uint16_to_int32_dtype_0, x = gather_146_cast_uint16)[name = string("cast_269")]; + int32 end_step_27 = add(x = offset, y = gather_146_cast_uint16_to_int32)[name = string("end_step_27")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_194 = const()[name = string("expand_dims_194"), val = tensor([0])]; + tensor expand_dims_195_axes_0 = const()[name = string("expand_dims_195_axes_0"), val = tensor([0])]; + tensor expand_dims_195 = expand_dims(axes = expand_dims_195_axes_0, x = end_step_27)[name = string("expand_dims_195")]; + tensor concat_268_values0_0 = const()[name = string("concat_268_values0_0"), val = tensor([12])]; + int32 concat_268_axis_0 = const()[name = string("concat_268_axis_0"), val = int32(0)]; + bool concat_268_interleave_0 = const()[name = string("concat_268_interleave_0"), val = bool(false)]; + tensor concat_268 = concat(axis = concat_268_axis_0, interleave = concat_268_interleave_0, values = (concat_268_values0_0, expand_dims_192, expand_dims_1, expand_dims_194))[name = string("concat_268")]; + tensor concat_269_values0_0 = const()[name = string("concat_269_values0_0"), val = tensor([0])]; + tensor concat_269_values1_0 = const()[name = string("concat_269_values1_0"), val = tensor([0])]; + tensor concat_269_values3_0 = const()[name = string("concat_269_values3_0"), val = tensor([0])]; + int32 concat_269_axis_0 = const()[name = string("concat_269_axis_0"), val = int32(0)]; + bool concat_269_interleave_0 = const()[name = string("concat_269_interleave_0"), val = bool(false)]; + tensor concat_269 = concat(axis = concat_269_axis_0, interleave = concat_269_interleave_0, values = (concat_269_values0_0, concat_269_values1_0, expand_dims_195, concat_269_values3_0))[name = string("concat_269")]; + tensor k_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = k_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = k_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_13_stride_0, update = linear_97_cast_fp16, x = coreml_update_state_70)[name = string("k_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_13_cast_fp16, input = k_cache1)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = k_cache1)[name = string("coreml_update_state_72")]; + tensor v_cache1_internal_tensor_assign_13_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_13_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_268, begin_mask = v_cache1_internal_tensor_assign_13_begin_mask_0, end = concat_269, end_mask = v_cache1_internal_tensor_assign_13_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_13_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_13_stride_0, update = linear_98_cast_fp16, x = coreml_update_state_71)[name = string("v_cache1_internal_tensor_assign_13_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_13_cast_fp16, input = v_cache1)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = v_cache1)[name = string("coreml_update_state_73")]; + int32 concat_274_values0_0 = const()[name = string("concat_274_values0_0"), val = int32(1)]; + int32 concat_274_values2_0 = const()[name = string("concat_274_values2_0"), val = int32(1024)]; + int32 concat_274_axis_0 = const()[name = string("concat_274_axis_0"), val = int32(0)]; + bool concat_274_interleave_0 = const()[name = string("concat_274_interleave_0"), val = bool(false)]; + tensor concat_274 = concat(axis = concat_274_axis_0, interleave = concat_274_interleave_0, values = (concat_274_values0_0, end_step_27, concat_274_values2_0))[name = string("concat_274")]; + tensor var_2736_begin_0 = const()[name = string("op_2736_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2736_end_mask_0 = const()[name = string("op_2736_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2736_cast_fp16 = slice_by_index(begin = var_2736_begin_0, end = concat_274, end_mask = var_2736_end_mask_0, x = k_cache_49_cast_fp16)[name = string("op_2736_cast_fp16")]; + tensor var_2739_begin_0 = const()[name = string("op_2739_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2739_end_mask_0 = const()[name = string("op_2739_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2739_cast_fp16 = slice_by_index(begin = var_2739_begin_0, end = concat_274, end_mask = var_2739_end_mask_0, x = v_cache_49_cast_fp16)[name = string("op_2739_cast_fp16")]; + tensor concat_276x = const()[name = string("concat_276x"), val = tensor([1, -1, 16, 64])]; + tensor var_2749_cast_fp16 = reshape(shape = concat_276x, x = linear_96_cast_fp16)[name = string("op_2749_cast_fp16")]; + tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_99_cast_fp16 = mul(x = var_2749_cast_fp16, y = const_168_to_fp16)[name = string("q_99_cast_fp16")]; + tensor concat_277x = const()[name = string("concat_277x"), val = tensor([1, -1, 16, 64])]; + tensor var_2756_cast_fp16 = reshape(shape = concat_277x, x = var_2736_cast_fp16)[name = string("op_2756_cast_fp16")]; + tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_125_cast_fp16 = mul(x = var_2756_cast_fp16, y = const_169_to_fp16)[name = string("k_125_cast_fp16")]; + tensor concat_278x = const()[name = string("concat_278x"), val = tensor([1, -1, 16, 64])]; + tensor var_2763_cast_fp16 = reshape(shape = concat_278x, x = var_2739_cast_fp16)[name = string("op_2763_cast_fp16")]; + tensor var_2764 = const()[name = string("op_2764"), val = tensor([0, 2, 1, 3])]; + bool qk_73_transpose_x_0 = const()[name = string("qk_73_transpose_x_0"), val = bool(false)]; + bool qk_73_transpose_y_0 = const()[name = string("qk_73_transpose_y_0"), val = bool(false)]; + tensor transpose_241_perm_0 = const()[name = string("transpose_241_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_242_perm_0 = const()[name = string("transpose_242_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_242 = transpose(perm = transpose_242_perm_0, x = k_125_cast_fp16)[name = string("transpose_382")]; + tensor transpose_241 = transpose(perm = transpose_241_perm_0, x = q_99_cast_fp16)[name = string("transpose_383")]; + tensor qk_73_cast_fp16 = matmul(transpose_x = qk_73_transpose_x_0, transpose_y = qk_73_transpose_y_0, x = transpose_241, y = transpose_242)[name = string("qk_73_cast_fp16")]; + int32 concat_279_values1_0 = const()[name = string("concat_279_values1_0"), val = int32(448)]; + int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; + bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_146_cast_uint16_to_int32, concat_279_values1_0))[name = string("concat_279")]; + tensor var_2767_begin_0 = const()[name = string("op_2767_begin_0"), val = tensor([0, 0])]; + tensor var_2767_end_mask_0 = const()[name = string("op_2767_end_mask_0"), val = tensor([false, true])]; + tensor var_2767_cast_fp16 = slice_by_index(begin = var_2767_begin_0, end = concat_279, end_mask = var_2767_end_mask_0, x = mask_to_fp16)[name = string("op_2767_cast_fp16")]; + int32 concat_280_values0_0 = const()[name = string("concat_280_values0_0"), val = int32(0)]; + int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; + bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; + tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (concat_280_values0_0, gather_146_cast_uint16_to_int32))[name = string("concat_280")]; + tensor var_2768_begin_0 = const()[name = string("op_2768_begin_0"), val = tensor([0, 0])]; + tensor var_2768_end_mask_0 = const()[name = string("op_2768_end_mask_0"), val = tensor([true, false])]; + tensor var_2768_cast_fp16 = slice_by_index(begin = var_2768_begin_0, end = concat_280, end_mask = var_2768_end_mask_0, x = var_2767_cast_fp16)[name = string("op_2768_cast_fp16")]; + tensor qk_75_cast_fp16 = add(x = qk_73_cast_fp16, y = var_2768_cast_fp16)[name = string("qk_75_cast_fp16")]; + tensor var_2771_cast_fp16 = softmax(axis = var_2680, x = qk_75_cast_fp16)[name = string("op_2771_cast_fp16")]; + bool var_2773_transpose_x_0 = const()[name = string("op_2773_transpose_x_0"), val = bool(false)]; + bool var_2773_transpose_y_0 = const()[name = string("op_2773_transpose_y_0"), val = bool(false)]; + tensor v_125_cast_fp16 = transpose(perm = var_2764, x = var_2763_cast_fp16)[name = string("transpose_384")]; + tensor var_2773_cast_fp16 = matmul(transpose_x = var_2773_transpose_x_0, transpose_y = var_2773_transpose_y_0, x = var_2771_cast_fp16, y = v_125_cast_fp16)[name = string("op_2773_cast_fp16")]; + tensor var_2774 = const()[name = string("op_2774"), val = tensor([0, 2, 1, 3])]; + tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, -1, 1024])]; + tensor var_2775_cast_fp16 = transpose(perm = var_2774, x = var_2773_cast_fp16)[name = string("transpose_381")]; + tensor x_223_cast_fp16 = reshape(shape = concat_281x, x = var_2775_cast_fp16)[name = string("x_223_cast_fp16")]; + tensor var_2779_to_fp16 = const()[name = string("op_2779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469643840)))]; + tensor var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471741056)))]; + tensor linear_99_cast_fp16 = linear(bias = var_2780_to_fp16, weight = var_2779_to_fp16, x = x_223_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_225_cast_fp16 = add(x = x_219_cast_fp16, y = linear_99_cast_fp16)[name = string("x_225_cast_fp16")]; + tensor var_2787_axes_0 = const()[name = string("op_2787_axes_0"), val = tensor([-1])]; + tensor blocks_12_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471743168)))]; + tensor blocks_12_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471745280)))]; + tensor var_2787_cast_fp16 = layer_norm(axes = var_2787_axes_0, beta = blocks_12_cross_attn_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_cross_attn_ln_weight_to_fp16, x = x_225_cast_fp16)[name = string("op_2787_cast_fp16")]; + tensor var_2796_to_fp16 = const()[name = string("op_2796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471747392)))]; + tensor var_2797_to_fp16 = const()[name = string("op_2797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473844608)))]; + tensor linear_100_cast_fp16 = linear(bias = var_2797_to_fp16, weight = var_2796_to_fp16, x = var_2787_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_282 = const()[name = string("concat_282"), val = tensor([0, 0, 0])]; + tensor concat_283 = const()[name = string("concat_283"), val = tensor([0, 1500, 0])]; + tensor k_127_internal_tensor_assign_1_stride_0 = const()[name = string("k_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_282, begin_mask = k_127_internal_tensor_assign_1_begin_mask_0, end = concat_283, end_mask = k_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_127_internal_tensor_assign_1_squeeze_mask_0, stride = k_127_internal_tensor_assign_1_stride_0, update = k_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("k_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_284 = const()[name = string("concat_284"), val = tensor([0, 0, 0])]; + tensor concat_285 = const()[name = string("concat_285"), val = tensor([0, 1500, 0])]; + tensor v_127_internal_tensor_assign_1_stride_0 = const()[name = string("v_127_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_127_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_127_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_127_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_127_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_284, begin_mask = v_127_internal_tensor_assign_1_begin_mask_0, end = concat_285, end_mask = v_127_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_127_internal_tensor_assign_1_squeeze_mask_0, stride = v_127_internal_tensor_assign_1_stride_0, update = v_cache_51_cast_fp16, x = k_7_to_fp16)[name = string("v_127_internal_tensor_assign_1_cast_fp16")]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 16, 64])]; + tensor var_2817_cast_fp16 = reshape(shape = concat_286x, x = linear_100_cast_fp16)[name = string("op_2817_cast_fp16")]; + tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_103_cast_fp16 = mul(x = var_2817_cast_fp16, y = const_170_to_fp16)[name = string("q_103_cast_fp16")]; + tensor var_2823 = const()[name = string("op_2823"), val = tensor([1, 1500, 16, -1])]; + tensor var_2824_cast_fp16 = reshape(shape = var_2823, x = k_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2824_cast_fp16")]; + tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_129_cast_fp16 = mul(x = var_2824_cast_fp16, y = const_171_to_fp16)[name = string("k_129_cast_fp16")]; + tensor var_2830 = const()[name = string("op_2830"), val = tensor([1, 1500, 16, -1])]; + tensor var_2831_cast_fp16 = reshape(shape = var_2830, x = v_127_internal_tensor_assign_1_cast_fp16)[name = string("op_2831_cast_fp16")]; + tensor var_2832 = const()[name = string("op_2832"), val = tensor([0, 2, 1, 3])]; + bool qk_77_transpose_x_0 = const()[name = string("qk_77_transpose_x_0"), val = bool(false)]; + bool qk_77_transpose_y_0 = const()[name = string("qk_77_transpose_y_0"), val = bool(false)]; + tensor transpose_243_perm_0 = const()[name = string("transpose_243_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_244_perm_0 = const()[name = string("transpose_244_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_244 = transpose(perm = transpose_244_perm_0, x = k_129_cast_fp16)[name = string("transpose_378")]; + tensor transpose_243 = transpose(perm = transpose_243_perm_0, x = q_103_cast_fp16)[name = string("transpose_379")]; + tensor qk_77_cast_fp16 = matmul(transpose_x = qk_77_transpose_x_0, transpose_y = qk_77_transpose_y_0, x = transpose_243, y = transpose_244)[name = string("qk_77_cast_fp16")]; + tensor var_2836_cast_fp16 = softmax(axis = var_2680, x = qk_77_cast_fp16)[name = string("op_2836_cast_fp16")]; + bool var_2838_transpose_x_0 = const()[name = string("op_2838_transpose_x_0"), val = bool(false)]; + bool var_2838_transpose_y_0 = const()[name = string("op_2838_transpose_y_0"), val = bool(false)]; + tensor v_129_cast_fp16 = transpose(perm = var_2832, x = var_2831_cast_fp16)[name = string("transpose_380")]; + tensor var_2838_cast_fp16 = matmul(transpose_x = var_2838_transpose_x_0, transpose_y = var_2838_transpose_y_0, x = var_2836_cast_fp16, y = v_129_cast_fp16)[name = string("op_2838_cast_fp16")]; + tensor var_2839 = const()[name = string("op_2839"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 1024])]; + tensor var_2840_cast_fp16 = transpose(perm = var_2839, x = var_2838_cast_fp16)[name = string("transpose_377")]; + tensor x_229_cast_fp16 = reshape(shape = concat_287x, x = var_2840_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2844_to_fp16 = const()[name = string("op_2844_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473846720)))]; + tensor var_2845_to_fp16 = const()[name = string("op_2845_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475943936)))]; + tensor linear_101_cast_fp16 = linear(bias = var_2845_to_fp16, weight = var_2844_to_fp16, x = x_229_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_231_cast_fp16 = add(x = x_225_cast_fp16, y = linear_101_cast_fp16)[name = string("x_231_cast_fp16")]; + tensor var_2852_axes_0 = const()[name = string("op_2852_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475946048)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475948160)))]; + tensor var_2852_cast_fp16 = layer_norm(axes = var_2852_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_2686_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_231_cast_fp16)[name = string("op_2852_cast_fp16")]; + tensor var_2861_to_fp16 = const()[name = string("op_2861_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(475950272)))]; + tensor var_2862_to_fp16 = const()[name = string("op_2862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484338944)))]; + tensor linear_102_cast_fp16 = linear(bias = var_2862_to_fp16, weight = var_2861_to_fp16, x = var_2852_cast_fp16)[name = string("linear_102_cast_fp16")]; + string x_235_mode_0 = const()[name = string("x_235_mode_0"), val = string("EXACT")]; + tensor x_235_cast_fp16 = gelu(mode = x_235_mode_0, x = linear_102_cast_fp16)[name = string("x_235_cast_fp16")]; + tensor var_2867_to_fp16 = const()[name = string("op_2867_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(484347200)))]; + tensor var_2868_to_fp16 = const()[name = string("op_2868_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492735872)))]; + tensor linear_103_cast_fp16 = linear(bias = var_2868_to_fp16, weight = var_2867_to_fp16, x = x_235_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor x_237_cast_fp16 = add(x = x_231_cast_fp16, y = linear_103_cast_fp16)[name = string("x_237_cast_fp16")]; + tensor k_cache_53_begin_0 = const()[name = string("k_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_53_end_0 = const()[name = string("k_cache_53_end_0"), val = tensor([14, 1, 448, 1024])]; + tensor k_cache_53_end_mask_0 = const()[name = string("k_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_53_squeeze_mask_0 = const()[name = string("k_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_53_cast_fp16 = slice_by_index(begin = k_cache_53_begin_0, end = k_cache_53_end_0, end_mask = k_cache_53_end_mask_0, squeeze_mask = k_cache_53_squeeze_mask_0, x = coreml_update_state_72)[name = string("k_cache_53_cast_fp16")]; + tensor v_cache_53_begin_0 = const()[name = string("v_cache_53_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_53_end_0 = const()[name = string("v_cache_53_end_0"), val = tensor([14, 1, 448, 1024])]; + tensor v_cache_53_end_mask_0 = const()[name = string("v_cache_53_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_53_squeeze_mask_0 = const()[name = string("v_cache_53_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_53_cast_fp16 = slice_by_index(begin = v_cache_53_begin_0, end = v_cache_53_end_0, end_mask = v_cache_53_end_mask_0, squeeze_mask = v_cache_53_squeeze_mask_0, x = coreml_update_state_73)[name = string("v_cache_53_cast_fp16")]; + tensor k_cache_55_begin_0 = const()[name = string("k_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor k_cache_55_end_0 = const()[name = string("k_cache_55_end_0"), val = tensor([14, 1, 1500, 1024])]; + tensor k_cache_55_end_mask_0 = const()[name = string("k_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_55_squeeze_mask_0 = const()[name = string("k_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_55_cast_fp16 = slice_by_index(begin = k_cache_55_begin_0, end = k_cache_55_end_0, end_mask = k_cache_55_end_mask_0, squeeze_mask = k_cache_55_squeeze_mask_0, x = read_state_2)[name = string("k_cache_55_cast_fp16")]; + tensor v_cache_55_begin_0 = const()[name = string("v_cache_55_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor v_cache_55_end_0 = const()[name = string("v_cache_55_end_0"), val = tensor([14, 1, 1500, 1024])]; + tensor v_cache_55_end_mask_0 = const()[name = string("v_cache_55_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_55_squeeze_mask_0 = const()[name = string("v_cache_55_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_55_cast_fp16 = slice_by_index(begin = v_cache_55_begin_0, end = v_cache_55_end_0, end_mask = v_cache_55_end_mask_0, squeeze_mask = v_cache_55_squeeze_mask_0, x = read_state_3)[name = string("v_cache_55_cast_fp16")]; + int32 var_2891 = const()[name = string("op_2891"), val = int32(-1)]; + tensor var_2909_axes_0 = const()[name = string("op_2909_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492737984)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492740096)))]; + fp16 var_2897_to_fp16 = const()[name = string("op_2897_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2909_cast_fp16 = layer_norm(axes = var_2909_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_237_cast_fp16)[name = string("op_2909_cast_fp16")]; + tensor var_2920_to_fp16 = const()[name = string("op_2920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492742208)))]; + tensor var_2921_to_fp16 = const()[name = string("op_2921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494839424)))]; + tensor linear_104_cast_fp16 = linear(bias = var_2921_to_fp16, weight = var_2920_to_fp16, x = var_2909_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_2924_to_fp16 = const()[name = string("op_2924_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494841536)))]; + tensor linear_105_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2924_to_fp16, x = var_2909_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor var_2928_to_fp16 = const()[name = string("op_2928_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496938752)))]; + tensor var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499035968)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2929_to_fp16, weight = var_2928_to_fp16, x = var_2909_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor var_2931_shape_cast_fp16 = shape(x = linear_104_cast_fp16)[name = string("op_2931_shape_cast_fp16")]; + int32 gather_158_axis_0 = const()[name = string("gather_158_axis_0"), val = int32(0)]; + int32 gather_158_batch_dims_0 = const()[name = string("gather_158_batch_dims_0"), val = int32(0)]; + bool gather_158_validate_indices_0 = const()[name = string("gather_158_validate_indices_0"), val = bool(false)]; + string var_2931_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2931_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_158_to_uint16 = const()[name = string("select_158_to_uint16"), val = uint16(1)]; + tensor var_2931_shape_cast_fp16_to_uint16 = cast(dtype = var_2931_shape_cast_fp16_to_uint16_dtype_0, x = var_2931_shape_cast_fp16)[name = string("cast_268")]; + uint16 gather_158_cast_uint16 = gather(axis = gather_158_axis_0, batch_dims = gather_158_batch_dims_0, indices = select_158_to_uint16, validate_indices = gather_158_validate_indices_0, x = var_2931_shape_cast_fp16_to_uint16)[name = string("gather_158_cast_uint16")]; + string gather_158_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_158_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_158_cast_uint16_to_int32 = cast(dtype = gather_158_cast_uint16_to_int32_dtype_0, x = gather_158_cast_uint16)[name = string("cast_267")]; + int32 end_step_29 = add(x = offset, y = gather_158_cast_uint16_to_int32)[name = string("end_step_29")]; + tensor expand_dims_208 = const()[name = string("expand_dims_208"), val = tensor([0])]; + tensor expand_dims_210 = const()[name = string("expand_dims_210"), val = tensor([0])]; + tensor expand_dims_211_axes_0 = const()[name = string("expand_dims_211_axes_0"), val = tensor([0])]; + tensor expand_dims_211 = expand_dims(axes = expand_dims_211_axes_0, x = end_step_29)[name = string("expand_dims_211")]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([13])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_208, expand_dims_1, expand_dims_210))[name = string("concat_290")]; + tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([0])]; + tensor concat_291_values1_0 = const()[name = string("concat_291_values1_0"), val = tensor([0])]; + tensor concat_291_values3_0 = const()[name = string("concat_291_values3_0"), val = tensor([0])]; + int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; + bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; + tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, concat_291_values1_0, expand_dims_211, concat_291_values3_0))[name = string("concat_291")]; + tensor k_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = k_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = k_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_14_stride_0, update = linear_105_cast_fp16, x = coreml_update_state_72)[name = string("k_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_14_cast_fp16, input = k_cache1)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = k_cache1)[name = string("coreml_update_state_74")]; + tensor v_cache1_internal_tensor_assign_14_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_14_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_290, begin_mask = v_cache1_internal_tensor_assign_14_begin_mask_0, end = concat_291, end_mask = v_cache1_internal_tensor_assign_14_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_14_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_14_stride_0, update = linear_106_cast_fp16, x = coreml_update_state_73)[name = string("v_cache1_internal_tensor_assign_14_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_14_cast_fp16, input = v_cache1)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = v_cache1)[name = string("coreml_update_state_75")]; + int32 concat_296_values0_0 = const()[name = string("concat_296_values0_0"), val = int32(1)]; + int32 concat_296_values2_0 = const()[name = string("concat_296_values2_0"), val = int32(1024)]; + int32 concat_296_axis_0 = const()[name = string("concat_296_axis_0"), val = int32(0)]; + bool concat_296_interleave_0 = const()[name = string("concat_296_interleave_0"), val = bool(false)]; + tensor concat_296 = concat(axis = concat_296_axis_0, interleave = concat_296_interleave_0, values = (concat_296_values0_0, end_step_29, concat_296_values2_0))[name = string("concat_296")]; + tensor var_2947_begin_0 = const()[name = string("op_2947_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2947_end_mask_0 = const()[name = string("op_2947_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2947_cast_fp16 = slice_by_index(begin = var_2947_begin_0, end = concat_296, end_mask = var_2947_end_mask_0, x = k_cache_53_cast_fp16)[name = string("op_2947_cast_fp16")]; + tensor var_2950_begin_0 = const()[name = string("op_2950_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2950_end_mask_0 = const()[name = string("op_2950_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2950_cast_fp16 = slice_by_index(begin = var_2950_begin_0, end = concat_296, end_mask = var_2950_end_mask_0, x = v_cache_53_cast_fp16)[name = string("op_2950_cast_fp16")]; + tensor concat_298x = const()[name = string("concat_298x"), val = tensor([1, -1, 16, 64])]; + tensor var_2960_cast_fp16 = reshape(shape = concat_298x, x = linear_104_cast_fp16)[name = string("op_2960_cast_fp16")]; + tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_107_cast_fp16 = mul(x = var_2960_cast_fp16, y = const_172_to_fp16)[name = string("q_107_cast_fp16")]; + tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, -1, 16, 64])]; + tensor var_2967_cast_fp16 = reshape(shape = concat_299x, x = var_2947_cast_fp16)[name = string("op_2967_cast_fp16")]; + tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_135_cast_fp16 = mul(x = var_2967_cast_fp16, y = const_173_to_fp16)[name = string("k_135_cast_fp16")]; + tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, -1, 16, 64])]; + tensor var_2974_cast_fp16 = reshape(shape = concat_300x, x = var_2950_cast_fp16)[name = string("op_2974_cast_fp16")]; + tensor var_2975 = const()[name = string("op_2975"), val = tensor([0, 2, 1, 3])]; + bool qk_79_transpose_x_0 = const()[name = string("qk_79_transpose_x_0"), val = bool(false)]; + bool qk_79_transpose_y_0 = const()[name = string("qk_79_transpose_y_0"), val = bool(false)]; + tensor transpose_245_perm_0 = const()[name = string("transpose_245_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_246_perm_0 = const()[name = string("transpose_246_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_246 = transpose(perm = transpose_246_perm_0, x = k_135_cast_fp16)[name = string("transpose_374")]; + tensor transpose_245 = transpose(perm = transpose_245_perm_0, x = q_107_cast_fp16)[name = string("transpose_375")]; + tensor qk_79_cast_fp16 = matmul(transpose_x = qk_79_transpose_x_0, transpose_y = qk_79_transpose_y_0, x = transpose_245, y = transpose_246)[name = string("qk_79_cast_fp16")]; + int32 concat_301_values1_0 = const()[name = string("concat_301_values1_0"), val = int32(448)]; + int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; + bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; + tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_158_cast_uint16_to_int32, concat_301_values1_0))[name = string("concat_301")]; + tensor var_2978_begin_0 = const()[name = string("op_2978_begin_0"), val = tensor([0, 0])]; + tensor var_2978_end_mask_0 = const()[name = string("op_2978_end_mask_0"), val = tensor([false, true])]; + tensor var_2978_cast_fp16 = slice_by_index(begin = var_2978_begin_0, end = concat_301, end_mask = var_2978_end_mask_0, x = mask_to_fp16)[name = string("op_2978_cast_fp16")]; + int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(0)]; + int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; + bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; + tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, gather_158_cast_uint16_to_int32))[name = string("concat_302")]; + tensor var_2979_begin_0 = const()[name = string("op_2979_begin_0"), val = tensor([0, 0])]; + tensor var_2979_end_mask_0 = const()[name = string("op_2979_end_mask_0"), val = tensor([true, false])]; + tensor var_2979_cast_fp16 = slice_by_index(begin = var_2979_begin_0, end = concat_302, end_mask = var_2979_end_mask_0, x = var_2978_cast_fp16)[name = string("op_2979_cast_fp16")]; + tensor qk_81_cast_fp16 = add(x = qk_79_cast_fp16, y = var_2979_cast_fp16)[name = string("qk_81_cast_fp16")]; + tensor var_2982_cast_fp16 = softmax(axis = var_2891, x = qk_81_cast_fp16)[name = string("op_2982_cast_fp16")]; + bool var_2984_transpose_x_0 = const()[name = string("op_2984_transpose_x_0"), val = bool(false)]; + bool var_2984_transpose_y_0 = const()[name = string("op_2984_transpose_y_0"), val = bool(false)]; + tensor v_135_cast_fp16 = transpose(perm = var_2975, x = var_2974_cast_fp16)[name = string("transpose_376")]; + tensor var_2984_cast_fp16 = matmul(transpose_x = var_2984_transpose_x_0, transpose_y = var_2984_transpose_y_0, x = var_2982_cast_fp16, y = v_135_cast_fp16)[name = string("op_2984_cast_fp16")]; + tensor var_2985 = const()[name = string("op_2985"), val = tensor([0, 2, 1, 3])]; + tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 1024])]; + tensor var_2986_cast_fp16 = transpose(perm = var_2985, x = var_2984_cast_fp16)[name = string("transpose_373")]; + tensor x_241_cast_fp16 = reshape(shape = concat_303x, x = var_2986_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2990_to_fp16 = const()[name = string("op_2990_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499038080)))]; + tensor var_2991_to_fp16 = const()[name = string("op_2991_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501135296)))]; + tensor linear_107_cast_fp16 = linear(bias = var_2991_to_fp16, weight = var_2990_to_fp16, x = x_241_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_243_cast_fp16 = add(x = x_237_cast_fp16, y = linear_107_cast_fp16)[name = string("x_243_cast_fp16")]; + tensor var_2998_axes_0 = const()[name = string("op_2998_axes_0"), val = tensor([-1])]; + tensor blocks_13_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501137408)))]; + tensor blocks_13_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501139520)))]; + tensor var_2998_cast_fp16 = layer_norm(axes = var_2998_axes_0, beta = blocks_13_cross_attn_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_cross_attn_ln_weight_to_fp16, x = x_243_cast_fp16)[name = string("op_2998_cast_fp16")]; + tensor var_3007_to_fp16 = const()[name = string("op_3007_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501141632)))]; + tensor var_3008_to_fp16 = const()[name = string("op_3008_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503238848)))]; + tensor linear_108_cast_fp16 = linear(bias = var_3008_to_fp16, weight = var_3007_to_fp16, x = var_2998_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor concat_304 = const()[name = string("concat_304"), val = tensor([0, 0, 0])]; + tensor concat_305 = const()[name = string("concat_305"), val = tensor([0, 1500, 0])]; + tensor k_137_internal_tensor_assign_1_stride_0 = const()[name = string("k_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_304, begin_mask = k_137_internal_tensor_assign_1_begin_mask_0, end = concat_305, end_mask = k_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_137_internal_tensor_assign_1_squeeze_mask_0, stride = k_137_internal_tensor_assign_1_stride_0, update = k_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("k_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_306 = const()[name = string("concat_306"), val = tensor([0, 0, 0])]; + tensor concat_307 = const()[name = string("concat_307"), val = tensor([0, 1500, 0])]; + tensor v_137_internal_tensor_assign_1_stride_0 = const()[name = string("v_137_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_137_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_137_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_137_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_137_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_306, begin_mask = v_137_internal_tensor_assign_1_begin_mask_0, end = concat_307, end_mask = v_137_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_137_internal_tensor_assign_1_squeeze_mask_0, stride = v_137_internal_tensor_assign_1_stride_0, update = v_cache_55_cast_fp16, x = k_7_to_fp16)[name = string("v_137_internal_tensor_assign_1_cast_fp16")]; + tensor concat_308x = const()[name = string("concat_308x"), val = tensor([1, -1, 16, 64])]; + tensor var_3028_cast_fp16 = reshape(shape = concat_308x, x = linear_108_cast_fp16)[name = string("op_3028_cast_fp16")]; + tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_111_cast_fp16 = mul(x = var_3028_cast_fp16, y = const_174_to_fp16)[name = string("q_111_cast_fp16")]; + tensor var_3034 = const()[name = string("op_3034"), val = tensor([1, 1500, 16, -1])]; + tensor var_3035_cast_fp16 = reshape(shape = var_3034, x = k_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3035_cast_fp16")]; + tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_139_cast_fp16 = mul(x = var_3035_cast_fp16, y = const_175_to_fp16)[name = string("k_139_cast_fp16")]; + tensor var_3041 = const()[name = string("op_3041"), val = tensor([1, 1500, 16, -1])]; + tensor var_3042_cast_fp16 = reshape(shape = var_3041, x = v_137_internal_tensor_assign_1_cast_fp16)[name = string("op_3042_cast_fp16")]; + tensor var_3043 = const()[name = string("op_3043"), val = tensor([0, 2, 1, 3])]; + bool qk_83_transpose_x_0 = const()[name = string("qk_83_transpose_x_0"), val = bool(false)]; + bool qk_83_transpose_y_0 = const()[name = string("qk_83_transpose_y_0"), val = bool(false)]; + tensor transpose_247_perm_0 = const()[name = string("transpose_247_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_248_perm_0 = const()[name = string("transpose_248_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_248 = transpose(perm = transpose_248_perm_0, x = k_139_cast_fp16)[name = string("transpose_370")]; + tensor transpose_247 = transpose(perm = transpose_247_perm_0, x = q_111_cast_fp16)[name = string("transpose_371")]; + tensor qk_83_cast_fp16 = matmul(transpose_x = qk_83_transpose_x_0, transpose_y = qk_83_transpose_y_0, x = transpose_247, y = transpose_248)[name = string("qk_83_cast_fp16")]; + tensor var_3047_cast_fp16 = softmax(axis = var_2891, x = qk_83_cast_fp16)[name = string("op_3047_cast_fp16")]; + bool var_3049_transpose_x_0 = const()[name = string("op_3049_transpose_x_0"), val = bool(false)]; + bool var_3049_transpose_y_0 = const()[name = string("op_3049_transpose_y_0"), val = bool(false)]; + tensor v_139_cast_fp16 = transpose(perm = var_3043, x = var_3042_cast_fp16)[name = string("transpose_372")]; + tensor var_3049_cast_fp16 = matmul(transpose_x = var_3049_transpose_x_0, transpose_y = var_3049_transpose_y_0, x = var_3047_cast_fp16, y = v_139_cast_fp16)[name = string("op_3049_cast_fp16")]; + tensor var_3050 = const()[name = string("op_3050"), val = tensor([0, 2, 1, 3])]; + tensor concat_309x = const()[name = string("concat_309x"), val = tensor([1, -1, 1024])]; + tensor var_3051_cast_fp16 = transpose(perm = var_3050, x = var_3049_cast_fp16)[name = string("transpose_369")]; + tensor x_247_cast_fp16 = reshape(shape = concat_309x, x = var_3051_cast_fp16)[name = string("x_247_cast_fp16")]; + tensor var_3055_to_fp16 = const()[name = string("op_3055_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(503240960)))]; + tensor var_3056_to_fp16 = const()[name = string("op_3056_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505338176)))]; + tensor linear_109_cast_fp16 = linear(bias = var_3056_to_fp16, weight = var_3055_to_fp16, x = x_247_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor x_249_cast_fp16 = add(x = x_243_cast_fp16, y = linear_109_cast_fp16)[name = string("x_249_cast_fp16")]; + tensor var_3063_axes_0 = const()[name = string("op_3063_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505340288)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505342400)))]; + tensor var_3063_cast_fp16 = layer_norm(axes = var_3063_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_2897_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_249_cast_fp16)[name = string("op_3063_cast_fp16")]; + tensor var_3072_to_fp16 = const()[name = string("op_3072_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505344512)))]; + tensor var_3073_to_fp16 = const()[name = string("op_3073_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513733184)))]; + tensor linear_110_cast_fp16 = linear(bias = var_3073_to_fp16, weight = var_3072_to_fp16, x = var_3063_cast_fp16)[name = string("linear_110_cast_fp16")]; + string x_253_mode_0 = const()[name = string("x_253_mode_0"), val = string("EXACT")]; + tensor x_253_cast_fp16 = gelu(mode = x_253_mode_0, x = linear_110_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_3078_to_fp16 = const()[name = string("op_3078_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513741440)))]; + tensor var_3079_to_fp16 = const()[name = string("op_3079_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522130112)))]; + tensor linear_111_cast_fp16 = linear(bias = var_3079_to_fp16, weight = var_3078_to_fp16, x = x_253_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_255_cast_fp16 = add(x = x_249_cast_fp16, y = linear_111_cast_fp16)[name = string("x_255_cast_fp16")]; + tensor k_cache_57_begin_0 = const()[name = string("k_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_57_end_0 = const()[name = string("k_cache_57_end_0"), val = tensor([15, 1, 448, 1024])]; + tensor k_cache_57_end_mask_0 = const()[name = string("k_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_57_squeeze_mask_0 = const()[name = string("k_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_57_cast_fp16 = slice_by_index(begin = k_cache_57_begin_0, end = k_cache_57_end_0, end_mask = k_cache_57_end_mask_0, squeeze_mask = k_cache_57_squeeze_mask_0, x = coreml_update_state_74)[name = string("k_cache_57_cast_fp16")]; + tensor v_cache_57_begin_0 = const()[name = string("v_cache_57_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_57_end_0 = const()[name = string("v_cache_57_end_0"), val = tensor([15, 1, 448, 1024])]; + tensor v_cache_57_end_mask_0 = const()[name = string("v_cache_57_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_57_squeeze_mask_0 = const()[name = string("v_cache_57_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_57_cast_fp16 = slice_by_index(begin = v_cache_57_begin_0, end = v_cache_57_end_0, end_mask = v_cache_57_end_mask_0, squeeze_mask = v_cache_57_squeeze_mask_0, x = coreml_update_state_75)[name = string("v_cache_57_cast_fp16")]; + tensor k_cache_59_begin_0 = const()[name = string("k_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor k_cache_59_end_0 = const()[name = string("k_cache_59_end_0"), val = tensor([15, 1, 1500, 1024])]; + tensor k_cache_59_end_mask_0 = const()[name = string("k_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_59_squeeze_mask_0 = const()[name = string("k_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_59_cast_fp16 = slice_by_index(begin = k_cache_59_begin_0, end = k_cache_59_end_0, end_mask = k_cache_59_end_mask_0, squeeze_mask = k_cache_59_squeeze_mask_0, x = read_state_2)[name = string("k_cache_59_cast_fp16")]; + tensor v_cache_59_begin_0 = const()[name = string("v_cache_59_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor v_cache_59_end_0 = const()[name = string("v_cache_59_end_0"), val = tensor([15, 1, 1500, 1024])]; + tensor v_cache_59_end_mask_0 = const()[name = string("v_cache_59_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_59_squeeze_mask_0 = const()[name = string("v_cache_59_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_59_cast_fp16 = slice_by_index(begin = v_cache_59_begin_0, end = v_cache_59_end_0, end_mask = v_cache_59_end_mask_0, squeeze_mask = v_cache_59_squeeze_mask_0, x = read_state_3)[name = string("v_cache_59_cast_fp16")]; + int32 var_3102 = const()[name = string("op_3102"), val = int32(-1)]; + tensor var_3120_axes_0 = const()[name = string("op_3120_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522132224)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522134336)))]; + fp16 var_3108_to_fp16 = const()[name = string("op_3108_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3120_cast_fp16 = layer_norm(axes = var_3120_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_255_cast_fp16)[name = string("op_3120_cast_fp16")]; + tensor var_3131_to_fp16 = const()[name = string("op_3131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522136448)))]; + tensor var_3132_to_fp16 = const()[name = string("op_3132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524233664)))]; + tensor linear_112_cast_fp16 = linear(bias = var_3132_to_fp16, weight = var_3131_to_fp16, x = var_3120_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor var_3135_to_fp16 = const()[name = string("op_3135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524235776)))]; + tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3135_to_fp16, x = var_3120_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor var_3139_to_fp16 = const()[name = string("op_3139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526332992)))]; + tensor var_3140_to_fp16 = const()[name = string("op_3140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528430208)))]; + tensor linear_114_cast_fp16 = linear(bias = var_3140_to_fp16, weight = var_3139_to_fp16, x = var_3120_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_3142_shape_cast_fp16 = shape(x = linear_112_cast_fp16)[name = string("op_3142_shape_cast_fp16")]; + int32 gather_170_axis_0 = const()[name = string("gather_170_axis_0"), val = int32(0)]; + int32 gather_170_batch_dims_0 = const()[name = string("gather_170_batch_dims_0"), val = int32(0)]; + bool gather_170_validate_indices_0 = const()[name = string("gather_170_validate_indices_0"), val = bool(false)]; + string var_3142_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3142_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_170_to_uint16 = const()[name = string("select_170_to_uint16"), val = uint16(1)]; + tensor var_3142_shape_cast_fp16_to_uint16 = cast(dtype = var_3142_shape_cast_fp16_to_uint16_dtype_0, x = var_3142_shape_cast_fp16)[name = string("cast_266")]; + uint16 gather_170_cast_uint16 = gather(axis = gather_170_axis_0, batch_dims = gather_170_batch_dims_0, indices = select_170_to_uint16, validate_indices = gather_170_validate_indices_0, x = var_3142_shape_cast_fp16_to_uint16)[name = string("gather_170_cast_uint16")]; + string gather_170_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_170_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_170_cast_uint16_to_int32 = cast(dtype = gather_170_cast_uint16_to_int32_dtype_0, x = gather_170_cast_uint16)[name = string("cast_265")]; + int32 end_step_31 = add(x = offset, y = gather_170_cast_uint16_to_int32)[name = string("end_step_31")]; + tensor expand_dims_224 = const()[name = string("expand_dims_224"), val = tensor([0])]; + tensor expand_dims_226 = const()[name = string("expand_dims_226"), val = tensor([0])]; + tensor expand_dims_227_axes_0 = const()[name = string("expand_dims_227_axes_0"), val = tensor([0])]; + tensor expand_dims_227 = expand_dims(axes = expand_dims_227_axes_0, x = end_step_31)[name = string("expand_dims_227")]; + tensor concat_312_values0_0 = const()[name = string("concat_312_values0_0"), val = tensor([14])]; + int32 concat_312_axis_0 = const()[name = string("concat_312_axis_0"), val = int32(0)]; + bool concat_312_interleave_0 = const()[name = string("concat_312_interleave_0"), val = bool(false)]; + tensor concat_312 = concat(axis = concat_312_axis_0, interleave = concat_312_interleave_0, values = (concat_312_values0_0, expand_dims_224, expand_dims_1, expand_dims_226))[name = string("concat_312")]; + tensor concat_313_values0_0 = const()[name = string("concat_313_values0_0"), val = tensor([0])]; + tensor concat_313_values1_0 = const()[name = string("concat_313_values1_0"), val = tensor([0])]; + tensor concat_313_values3_0 = const()[name = string("concat_313_values3_0"), val = tensor([0])]; + int32 concat_313_axis_0 = const()[name = string("concat_313_axis_0"), val = int32(0)]; + bool concat_313_interleave_0 = const()[name = string("concat_313_interleave_0"), val = bool(false)]; + tensor concat_313 = concat(axis = concat_313_axis_0, interleave = concat_313_interleave_0, values = (concat_313_values0_0, concat_313_values1_0, expand_dims_227, concat_313_values3_0))[name = string("concat_313")]; + tensor k_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = k_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = k_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_15_stride_0, update = linear_113_cast_fp16, x = coreml_update_state_74)[name = string("k_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_15_cast_fp16, input = k_cache1)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = k_cache1)[name = string("coreml_update_state_76")]; + tensor v_cache1_internal_tensor_assign_15_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_15_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_312, begin_mask = v_cache1_internal_tensor_assign_15_begin_mask_0, end = concat_313, end_mask = v_cache1_internal_tensor_assign_15_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_15_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_15_stride_0, update = linear_114_cast_fp16, x = coreml_update_state_75)[name = string("v_cache1_internal_tensor_assign_15_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_15_cast_fp16, input = v_cache1)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = v_cache1)[name = string("coreml_update_state_77")]; + int32 concat_318_values0_0 = const()[name = string("concat_318_values0_0"), val = int32(1)]; + int32 concat_318_values2_0 = const()[name = string("concat_318_values2_0"), val = int32(1024)]; + int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; + bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; + tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (concat_318_values0_0, end_step_31, concat_318_values2_0))[name = string("concat_318")]; + tensor var_3158_begin_0 = const()[name = string("op_3158_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3158_end_mask_0 = const()[name = string("op_3158_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3158_cast_fp16 = slice_by_index(begin = var_3158_begin_0, end = concat_318, end_mask = var_3158_end_mask_0, x = k_cache_57_cast_fp16)[name = string("op_3158_cast_fp16")]; + tensor var_3161_begin_0 = const()[name = string("op_3161_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3161_end_mask_0 = const()[name = string("op_3161_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3161_cast_fp16 = slice_by_index(begin = var_3161_begin_0, end = concat_318, end_mask = var_3161_end_mask_0, x = v_cache_57_cast_fp16)[name = string("op_3161_cast_fp16")]; + tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, -1, 16, 64])]; + tensor var_3171_cast_fp16 = reshape(shape = concat_320x, x = linear_112_cast_fp16)[name = string("op_3171_cast_fp16")]; + tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_115_cast_fp16 = mul(x = var_3171_cast_fp16, y = const_176_to_fp16)[name = string("q_115_cast_fp16")]; + tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, -1, 16, 64])]; + tensor var_3178_cast_fp16 = reshape(shape = concat_321x, x = var_3158_cast_fp16)[name = string("op_3178_cast_fp16")]; + tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_145_cast_fp16 = mul(x = var_3178_cast_fp16, y = const_177_to_fp16)[name = string("k_145_cast_fp16")]; + tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 16, 64])]; + tensor var_3185_cast_fp16 = reshape(shape = concat_322x, x = var_3161_cast_fp16)[name = string("op_3185_cast_fp16")]; + tensor var_3186 = const()[name = string("op_3186"), val = tensor([0, 2, 1, 3])]; + bool qk_85_transpose_x_0 = const()[name = string("qk_85_transpose_x_0"), val = bool(false)]; + bool qk_85_transpose_y_0 = const()[name = string("qk_85_transpose_y_0"), val = bool(false)]; + tensor transpose_249_perm_0 = const()[name = string("transpose_249_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_250_perm_0 = const()[name = string("transpose_250_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_250 = transpose(perm = transpose_250_perm_0, x = k_145_cast_fp16)[name = string("transpose_366")]; + tensor transpose_249 = transpose(perm = transpose_249_perm_0, x = q_115_cast_fp16)[name = string("transpose_367")]; + tensor qk_85_cast_fp16 = matmul(transpose_x = qk_85_transpose_x_0, transpose_y = qk_85_transpose_y_0, x = transpose_249, y = transpose_250)[name = string("qk_85_cast_fp16")]; + int32 concat_323_values1_0 = const()[name = string("concat_323_values1_0"), val = int32(448)]; + int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; + bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; + tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_170_cast_uint16_to_int32, concat_323_values1_0))[name = string("concat_323")]; + tensor var_3189_begin_0 = const()[name = string("op_3189_begin_0"), val = tensor([0, 0])]; + tensor var_3189_end_mask_0 = const()[name = string("op_3189_end_mask_0"), val = tensor([false, true])]; + tensor var_3189_cast_fp16 = slice_by_index(begin = var_3189_begin_0, end = concat_323, end_mask = var_3189_end_mask_0, x = mask_to_fp16)[name = string("op_3189_cast_fp16")]; + int32 concat_324_values0_0 = const()[name = string("concat_324_values0_0"), val = int32(0)]; + int32 concat_324_axis_0 = const()[name = string("concat_324_axis_0"), val = int32(0)]; + bool concat_324_interleave_0 = const()[name = string("concat_324_interleave_0"), val = bool(false)]; + tensor concat_324 = concat(axis = concat_324_axis_0, interleave = concat_324_interleave_0, values = (concat_324_values0_0, gather_170_cast_uint16_to_int32))[name = string("concat_324")]; + tensor var_3190_begin_0 = const()[name = string("op_3190_begin_0"), val = tensor([0, 0])]; + tensor var_3190_end_mask_0 = const()[name = string("op_3190_end_mask_0"), val = tensor([true, false])]; + tensor var_3190_cast_fp16 = slice_by_index(begin = var_3190_begin_0, end = concat_324, end_mask = var_3190_end_mask_0, x = var_3189_cast_fp16)[name = string("op_3190_cast_fp16")]; + tensor qk_87_cast_fp16 = add(x = qk_85_cast_fp16, y = var_3190_cast_fp16)[name = string("qk_87_cast_fp16")]; + tensor var_3193_cast_fp16 = softmax(axis = var_3102, x = qk_87_cast_fp16)[name = string("op_3193_cast_fp16")]; + bool var_3195_transpose_x_0 = const()[name = string("op_3195_transpose_x_0"), val = bool(false)]; + bool var_3195_transpose_y_0 = const()[name = string("op_3195_transpose_y_0"), val = bool(false)]; + tensor v_145_cast_fp16 = transpose(perm = var_3186, x = var_3185_cast_fp16)[name = string("transpose_368")]; + tensor var_3195_cast_fp16 = matmul(transpose_x = var_3195_transpose_x_0, transpose_y = var_3195_transpose_y_0, x = var_3193_cast_fp16, y = v_145_cast_fp16)[name = string("op_3195_cast_fp16")]; + tensor var_3196 = const()[name = string("op_3196"), val = tensor([0, 2, 1, 3])]; + tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 1024])]; + tensor var_3197_cast_fp16 = transpose(perm = var_3196, x = var_3195_cast_fp16)[name = string("transpose_365")]; + tensor x_259_cast_fp16 = reshape(shape = concat_325x, x = var_3197_cast_fp16)[name = string("x_259_cast_fp16")]; + tensor var_3201_to_fp16 = const()[name = string("op_3201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528432320)))]; + tensor var_3202_to_fp16 = const()[name = string("op_3202_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530529536)))]; + tensor linear_115_cast_fp16 = linear(bias = var_3202_to_fp16, weight = var_3201_to_fp16, x = x_259_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor x_261_cast_fp16 = add(x = x_255_cast_fp16, y = linear_115_cast_fp16)[name = string("x_261_cast_fp16")]; + tensor var_3209_axes_0 = const()[name = string("op_3209_axes_0"), val = tensor([-1])]; + tensor blocks_14_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530531648)))]; + tensor blocks_14_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530533760)))]; + tensor var_3209_cast_fp16 = layer_norm(axes = var_3209_axes_0, beta = blocks_14_cross_attn_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_cross_attn_ln_weight_to_fp16, x = x_261_cast_fp16)[name = string("op_3209_cast_fp16")]; + tensor var_3218_to_fp16 = const()[name = string("op_3218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530535872)))]; + tensor var_3219_to_fp16 = const()[name = string("op_3219_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532633088)))]; + tensor linear_116_cast_fp16 = linear(bias = var_3219_to_fp16, weight = var_3218_to_fp16, x = var_3209_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor concat_326 = const()[name = string("concat_326"), val = tensor([0, 0, 0])]; + tensor concat_327 = const()[name = string("concat_327"), val = tensor([0, 1500, 0])]; + tensor k_147_internal_tensor_assign_1_stride_0 = const()[name = string("k_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_326, begin_mask = k_147_internal_tensor_assign_1_begin_mask_0, end = concat_327, end_mask = k_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_147_internal_tensor_assign_1_squeeze_mask_0, stride = k_147_internal_tensor_assign_1_stride_0, update = k_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("k_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_328 = const()[name = string("concat_328"), val = tensor([0, 0, 0])]; + tensor concat_329 = const()[name = string("concat_329"), val = tensor([0, 1500, 0])]; + tensor v_147_internal_tensor_assign_1_stride_0 = const()[name = string("v_147_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_147_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_147_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_147_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_147_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_328, begin_mask = v_147_internal_tensor_assign_1_begin_mask_0, end = concat_329, end_mask = v_147_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_147_internal_tensor_assign_1_squeeze_mask_0, stride = v_147_internal_tensor_assign_1_stride_0, update = v_cache_59_cast_fp16, x = k_7_to_fp16)[name = string("v_147_internal_tensor_assign_1_cast_fp16")]; + tensor concat_330x = const()[name = string("concat_330x"), val = tensor([1, -1, 16, 64])]; + tensor var_3239_cast_fp16 = reshape(shape = concat_330x, x = linear_116_cast_fp16)[name = string("op_3239_cast_fp16")]; + tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_119_cast_fp16 = mul(x = var_3239_cast_fp16, y = const_178_to_fp16)[name = string("q_119_cast_fp16")]; + tensor var_3245 = const()[name = string("op_3245"), val = tensor([1, 1500, 16, -1])]; + tensor var_3246_cast_fp16 = reshape(shape = var_3245, x = k_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3246_cast_fp16")]; + tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_149_cast_fp16 = mul(x = var_3246_cast_fp16, y = const_179_to_fp16)[name = string("k_149_cast_fp16")]; + tensor var_3252 = const()[name = string("op_3252"), val = tensor([1, 1500, 16, -1])]; + tensor var_3253_cast_fp16 = reshape(shape = var_3252, x = v_147_internal_tensor_assign_1_cast_fp16)[name = string("op_3253_cast_fp16")]; + tensor var_3254 = const()[name = string("op_3254"), val = tensor([0, 2, 1, 3])]; + bool qk_89_transpose_x_0 = const()[name = string("qk_89_transpose_x_0"), val = bool(false)]; + bool qk_89_transpose_y_0 = const()[name = string("qk_89_transpose_y_0"), val = bool(false)]; + tensor transpose_251_perm_0 = const()[name = string("transpose_251_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_252_perm_0 = const()[name = string("transpose_252_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_252 = transpose(perm = transpose_252_perm_0, x = k_149_cast_fp16)[name = string("transpose_362")]; + tensor transpose_251 = transpose(perm = transpose_251_perm_0, x = q_119_cast_fp16)[name = string("transpose_363")]; + tensor qk_89_cast_fp16 = matmul(transpose_x = qk_89_transpose_x_0, transpose_y = qk_89_transpose_y_0, x = transpose_251, y = transpose_252)[name = string("qk_89_cast_fp16")]; + tensor var_3258_cast_fp16 = softmax(axis = var_3102, x = qk_89_cast_fp16)[name = string("op_3258_cast_fp16")]; + bool var_3260_transpose_x_0 = const()[name = string("op_3260_transpose_x_0"), val = bool(false)]; + bool var_3260_transpose_y_0 = const()[name = string("op_3260_transpose_y_0"), val = bool(false)]; + tensor v_149_cast_fp16 = transpose(perm = var_3254, x = var_3253_cast_fp16)[name = string("transpose_364")]; + tensor var_3260_cast_fp16 = matmul(transpose_x = var_3260_transpose_x_0, transpose_y = var_3260_transpose_y_0, x = var_3258_cast_fp16, y = v_149_cast_fp16)[name = string("op_3260_cast_fp16")]; + tensor var_3261 = const()[name = string("op_3261"), val = tensor([0, 2, 1, 3])]; + tensor concat_331x = const()[name = string("concat_331x"), val = tensor([1, -1, 1024])]; + tensor var_3262_cast_fp16 = transpose(perm = var_3261, x = var_3260_cast_fp16)[name = string("transpose_361")]; + tensor x_265_cast_fp16 = reshape(shape = concat_331x, x = var_3262_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_3266_to_fp16 = const()[name = string("op_3266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(532635200)))]; + tensor var_3267_to_fp16 = const()[name = string("op_3267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534732416)))]; + tensor linear_117_cast_fp16 = linear(bias = var_3267_to_fp16, weight = var_3266_to_fp16, x = x_265_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_267_cast_fp16 = add(x = x_261_cast_fp16, y = linear_117_cast_fp16)[name = string("x_267_cast_fp16")]; + tensor var_3274_axes_0 = const()[name = string("op_3274_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534734528)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534736640)))]; + tensor var_3274_cast_fp16 = layer_norm(axes = var_3274_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_3108_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_267_cast_fp16)[name = string("op_3274_cast_fp16")]; + tensor var_3283_to_fp16 = const()[name = string("op_3283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(534738752)))]; + tensor var_3284_to_fp16 = const()[name = string("op_3284_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543127424)))]; + tensor linear_118_cast_fp16 = linear(bias = var_3284_to_fp16, weight = var_3283_to_fp16, x = var_3274_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_271_mode_0 = const()[name = string("x_271_mode_0"), val = string("EXACT")]; + tensor x_271_cast_fp16 = gelu(mode = x_271_mode_0, x = linear_118_cast_fp16)[name = string("x_271_cast_fp16")]; + tensor var_3289_to_fp16 = const()[name = string("op_3289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543135680)))]; + tensor var_3290_to_fp16 = const()[name = string("op_3290_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551524352)))]; + tensor linear_119_cast_fp16 = linear(bias = var_3290_to_fp16, weight = var_3289_to_fp16, x = x_271_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_273_cast_fp16 = add(x = x_267_cast_fp16, y = linear_119_cast_fp16)[name = string("x_273_cast_fp16")]; + tensor k_cache_61_begin_0 = const()[name = string("k_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_61_end_0 = const()[name = string("k_cache_61_end_0"), val = tensor([16, 1, 448, 1024])]; + tensor k_cache_61_end_mask_0 = const()[name = string("k_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_61_squeeze_mask_0 = const()[name = string("k_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_61_cast_fp16 = slice_by_index(begin = k_cache_61_begin_0, end = k_cache_61_end_0, end_mask = k_cache_61_end_mask_0, squeeze_mask = k_cache_61_squeeze_mask_0, x = coreml_update_state_76)[name = string("k_cache_61_cast_fp16")]; + tensor v_cache_61_begin_0 = const()[name = string("v_cache_61_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_61_end_0 = const()[name = string("v_cache_61_end_0"), val = tensor([16, 1, 448, 1024])]; + tensor v_cache_61_end_mask_0 = const()[name = string("v_cache_61_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_61_squeeze_mask_0 = const()[name = string("v_cache_61_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_61_cast_fp16 = slice_by_index(begin = v_cache_61_begin_0, end = v_cache_61_end_0, end_mask = v_cache_61_end_mask_0, squeeze_mask = v_cache_61_squeeze_mask_0, x = coreml_update_state_77)[name = string("v_cache_61_cast_fp16")]; + tensor k_cache_63_begin_0 = const()[name = string("k_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor k_cache_63_end_0 = const()[name = string("k_cache_63_end_0"), val = tensor([16, 1, 1500, 1024])]; + tensor k_cache_63_end_mask_0 = const()[name = string("k_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_63_squeeze_mask_0 = const()[name = string("k_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_63_cast_fp16 = slice_by_index(begin = k_cache_63_begin_0, end = k_cache_63_end_0, end_mask = k_cache_63_end_mask_0, squeeze_mask = k_cache_63_squeeze_mask_0, x = read_state_2)[name = string("k_cache_63_cast_fp16")]; + tensor v_cache_63_begin_0 = const()[name = string("v_cache_63_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor v_cache_63_end_0 = const()[name = string("v_cache_63_end_0"), val = tensor([16, 1, 1500, 1024])]; + tensor v_cache_63_end_mask_0 = const()[name = string("v_cache_63_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_63_squeeze_mask_0 = const()[name = string("v_cache_63_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_63_cast_fp16 = slice_by_index(begin = v_cache_63_begin_0, end = v_cache_63_end_0, end_mask = v_cache_63_end_mask_0, squeeze_mask = v_cache_63_squeeze_mask_0, x = read_state_3)[name = string("v_cache_63_cast_fp16")]; + int32 var_3313 = const()[name = string("op_3313"), val = int32(-1)]; + tensor var_3331_axes_0 = const()[name = string("op_3331_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551526464)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551528576)))]; + fp16 var_3319_to_fp16 = const()[name = string("op_3319_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3331_cast_fp16 = layer_norm(axes = var_3331_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_273_cast_fp16)[name = string("op_3331_cast_fp16")]; + tensor var_3342_to_fp16 = const()[name = string("op_3342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551530688)))]; + tensor var_3343_to_fp16 = const()[name = string("op_3343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553627904)))]; + tensor linear_120_cast_fp16 = linear(bias = var_3343_to_fp16, weight = var_3342_to_fp16, x = var_3331_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_3346_to_fp16 = const()[name = string("op_3346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(553630016)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3346_to_fp16, x = var_3331_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_3350_to_fp16 = const()[name = string("op_3350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555727232)))]; + tensor var_3351_to_fp16 = const()[name = string("op_3351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557824448)))]; + tensor linear_122_cast_fp16 = linear(bias = var_3351_to_fp16, weight = var_3350_to_fp16, x = var_3331_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_3353_shape_cast_fp16 = shape(x = linear_120_cast_fp16)[name = string("op_3353_shape_cast_fp16")]; + int32 gather_182_axis_0 = const()[name = string("gather_182_axis_0"), val = int32(0)]; + int32 gather_182_batch_dims_0 = const()[name = string("gather_182_batch_dims_0"), val = int32(0)]; + bool gather_182_validate_indices_0 = const()[name = string("gather_182_validate_indices_0"), val = bool(false)]; + string var_3353_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3353_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_182_to_uint16 = const()[name = string("select_182_to_uint16"), val = uint16(1)]; + tensor var_3353_shape_cast_fp16_to_uint16 = cast(dtype = var_3353_shape_cast_fp16_to_uint16_dtype_0, x = var_3353_shape_cast_fp16)[name = string("cast_264")]; + uint16 gather_182_cast_uint16 = gather(axis = gather_182_axis_0, batch_dims = gather_182_batch_dims_0, indices = select_182_to_uint16, validate_indices = gather_182_validate_indices_0, x = var_3353_shape_cast_fp16_to_uint16)[name = string("gather_182_cast_uint16")]; + string gather_182_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_182_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_182_cast_uint16_to_int32 = cast(dtype = gather_182_cast_uint16_to_int32_dtype_0, x = gather_182_cast_uint16)[name = string("cast_263")]; + int32 end_step_33 = add(x = offset, y = gather_182_cast_uint16_to_int32)[name = string("end_step_33")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_242 = const()[name = string("expand_dims_242"), val = tensor([0])]; + tensor expand_dims_243_axes_0 = const()[name = string("expand_dims_243_axes_0"), val = tensor([0])]; + tensor expand_dims_243 = expand_dims(axes = expand_dims_243_axes_0, x = end_step_33)[name = string("expand_dims_243")]; + tensor concat_334_values0_0 = const()[name = string("concat_334_values0_0"), val = tensor([15])]; + int32 concat_334_axis_0 = const()[name = string("concat_334_axis_0"), val = int32(0)]; + bool concat_334_interleave_0 = const()[name = string("concat_334_interleave_0"), val = bool(false)]; + tensor concat_334 = concat(axis = concat_334_axis_0, interleave = concat_334_interleave_0, values = (concat_334_values0_0, expand_dims_240, expand_dims_1, expand_dims_242))[name = string("concat_334")]; + tensor concat_335_values0_0 = const()[name = string("concat_335_values0_0"), val = tensor([0])]; + tensor concat_335_values1_0 = const()[name = string("concat_335_values1_0"), val = tensor([0])]; + tensor concat_335_values3_0 = const()[name = string("concat_335_values3_0"), val = tensor([0])]; + int32 concat_335_axis_0 = const()[name = string("concat_335_axis_0"), val = int32(0)]; + bool concat_335_interleave_0 = const()[name = string("concat_335_interleave_0"), val = bool(false)]; + tensor concat_335 = concat(axis = concat_335_axis_0, interleave = concat_335_interleave_0, values = (concat_335_values0_0, concat_335_values1_0, expand_dims_243, concat_335_values3_0))[name = string("concat_335")]; + tensor k_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = k_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = k_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_16_stride_0, update = linear_121_cast_fp16, x = coreml_update_state_76)[name = string("k_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_16_cast_fp16, input = k_cache1)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = k_cache1)[name = string("coreml_update_state_78")]; + tensor v_cache1_internal_tensor_assign_16_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_16_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_334, begin_mask = v_cache1_internal_tensor_assign_16_begin_mask_0, end = concat_335, end_mask = v_cache1_internal_tensor_assign_16_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_16_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_16_stride_0, update = linear_122_cast_fp16, x = coreml_update_state_77)[name = string("v_cache1_internal_tensor_assign_16_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_16_cast_fp16, input = v_cache1)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = v_cache1)[name = string("coreml_update_state_79")]; + int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; + int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(1024)]; + int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; + bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; + tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, end_step_33, concat_340_values2_0))[name = string("concat_340")]; + tensor var_3369_begin_0 = const()[name = string("op_3369_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3369_end_mask_0 = const()[name = string("op_3369_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3369_cast_fp16 = slice_by_index(begin = var_3369_begin_0, end = concat_340, end_mask = var_3369_end_mask_0, x = k_cache_61_cast_fp16)[name = string("op_3369_cast_fp16")]; + tensor var_3372_begin_0 = const()[name = string("op_3372_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3372_end_mask_0 = const()[name = string("op_3372_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3372_cast_fp16 = slice_by_index(begin = var_3372_begin_0, end = concat_340, end_mask = var_3372_end_mask_0, x = v_cache_61_cast_fp16)[name = string("op_3372_cast_fp16")]; + tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 16, 64])]; + tensor var_3382_cast_fp16 = reshape(shape = concat_342x, x = linear_120_cast_fp16)[name = string("op_3382_cast_fp16")]; + tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_123_cast_fp16 = mul(x = var_3382_cast_fp16, y = const_180_to_fp16)[name = string("q_123_cast_fp16")]; + tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 16, 64])]; + tensor var_3389_cast_fp16 = reshape(shape = concat_343x, x = var_3369_cast_fp16)[name = string("op_3389_cast_fp16")]; + tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_155_cast_fp16 = mul(x = var_3389_cast_fp16, y = const_181_to_fp16)[name = string("k_155_cast_fp16")]; + tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 16, 64])]; + tensor var_3396_cast_fp16 = reshape(shape = concat_344x, x = var_3372_cast_fp16)[name = string("op_3396_cast_fp16")]; + tensor var_3397 = const()[name = string("op_3397"), val = tensor([0, 2, 1, 3])]; + bool qk_91_transpose_x_0 = const()[name = string("qk_91_transpose_x_0"), val = bool(false)]; + bool qk_91_transpose_y_0 = const()[name = string("qk_91_transpose_y_0"), val = bool(false)]; + tensor transpose_253_perm_0 = const()[name = string("transpose_253_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_254_perm_0 = const()[name = string("transpose_254_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_254 = transpose(perm = transpose_254_perm_0, x = k_155_cast_fp16)[name = string("transpose_358")]; + tensor transpose_253 = transpose(perm = transpose_253_perm_0, x = q_123_cast_fp16)[name = string("transpose_359")]; + tensor qk_91_cast_fp16 = matmul(transpose_x = qk_91_transpose_x_0, transpose_y = qk_91_transpose_y_0, x = transpose_253, y = transpose_254)[name = string("qk_91_cast_fp16")]; + int32 concat_345_values1_0 = const()[name = string("concat_345_values1_0"), val = int32(448)]; + int32 concat_345_axis_0 = const()[name = string("concat_345_axis_0"), val = int32(0)]; + bool concat_345_interleave_0 = const()[name = string("concat_345_interleave_0"), val = bool(false)]; + tensor concat_345 = concat(axis = concat_345_axis_0, interleave = concat_345_interleave_0, values = (gather_182_cast_uint16_to_int32, concat_345_values1_0))[name = string("concat_345")]; + tensor var_3400_begin_0 = const()[name = string("op_3400_begin_0"), val = tensor([0, 0])]; + tensor var_3400_end_mask_0 = const()[name = string("op_3400_end_mask_0"), val = tensor([false, true])]; + tensor var_3400_cast_fp16 = slice_by_index(begin = var_3400_begin_0, end = concat_345, end_mask = var_3400_end_mask_0, x = mask_to_fp16)[name = string("op_3400_cast_fp16")]; + int32 concat_346_values0_0 = const()[name = string("concat_346_values0_0"), val = int32(0)]; + int32 concat_346_axis_0 = const()[name = string("concat_346_axis_0"), val = int32(0)]; + bool concat_346_interleave_0 = const()[name = string("concat_346_interleave_0"), val = bool(false)]; + tensor concat_346 = concat(axis = concat_346_axis_0, interleave = concat_346_interleave_0, values = (concat_346_values0_0, gather_182_cast_uint16_to_int32))[name = string("concat_346")]; + tensor var_3401_begin_0 = const()[name = string("op_3401_begin_0"), val = tensor([0, 0])]; + tensor var_3401_end_mask_0 = const()[name = string("op_3401_end_mask_0"), val = tensor([true, false])]; + tensor var_3401_cast_fp16 = slice_by_index(begin = var_3401_begin_0, end = concat_346, end_mask = var_3401_end_mask_0, x = var_3400_cast_fp16)[name = string("op_3401_cast_fp16")]; + tensor qk_93_cast_fp16 = add(x = qk_91_cast_fp16, y = var_3401_cast_fp16)[name = string("qk_93_cast_fp16")]; + tensor var_3404_cast_fp16 = softmax(axis = var_3313, x = qk_93_cast_fp16)[name = string("op_3404_cast_fp16")]; + bool var_3406_transpose_x_0 = const()[name = string("op_3406_transpose_x_0"), val = bool(false)]; + bool var_3406_transpose_y_0 = const()[name = string("op_3406_transpose_y_0"), val = bool(false)]; + tensor v_155_cast_fp16 = transpose(perm = var_3397, x = var_3396_cast_fp16)[name = string("transpose_360")]; + tensor var_3406_cast_fp16 = matmul(transpose_x = var_3406_transpose_x_0, transpose_y = var_3406_transpose_y_0, x = var_3404_cast_fp16, y = v_155_cast_fp16)[name = string("op_3406_cast_fp16")]; + tensor var_3407 = const()[name = string("op_3407"), val = tensor([0, 2, 1, 3])]; + tensor concat_347x = const()[name = string("concat_347x"), val = tensor([1, -1, 1024])]; + tensor var_3408_cast_fp16 = transpose(perm = var_3407, x = var_3406_cast_fp16)[name = string("transpose_357")]; + tensor x_277_cast_fp16 = reshape(shape = concat_347x, x = var_3408_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_3412_to_fp16 = const()[name = string("op_3412_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557826560)))]; + tensor var_3413_to_fp16 = const()[name = string("op_3413_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559923776)))]; + tensor linear_123_cast_fp16 = linear(bias = var_3413_to_fp16, weight = var_3412_to_fp16, x = x_277_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_279_cast_fp16 = add(x = x_273_cast_fp16, y = linear_123_cast_fp16)[name = string("x_279_cast_fp16")]; + tensor var_3420_axes_0 = const()[name = string("op_3420_axes_0"), val = tensor([-1])]; + tensor blocks_15_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559925888)))]; + tensor blocks_15_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559928000)))]; + tensor var_3420_cast_fp16 = layer_norm(axes = var_3420_axes_0, beta = blocks_15_cross_attn_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_cross_attn_ln_weight_to_fp16, x = x_279_cast_fp16)[name = string("op_3420_cast_fp16")]; + tensor var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(559930112)))]; + tensor var_3430_to_fp16 = const()[name = string("op_3430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562027328)))]; + tensor linear_124_cast_fp16 = linear(bias = var_3430_to_fp16, weight = var_3429_to_fp16, x = var_3420_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor concat_348 = const()[name = string("concat_348"), val = tensor([0, 0, 0])]; + tensor concat_349 = const()[name = string("concat_349"), val = tensor([0, 1500, 0])]; + tensor k_157_internal_tensor_assign_1_stride_0 = const()[name = string("k_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_348, begin_mask = k_157_internal_tensor_assign_1_begin_mask_0, end = concat_349, end_mask = k_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_157_internal_tensor_assign_1_squeeze_mask_0, stride = k_157_internal_tensor_assign_1_stride_0, update = k_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("k_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_350 = const()[name = string("concat_350"), val = tensor([0, 0, 0])]; + tensor concat_351 = const()[name = string("concat_351"), val = tensor([0, 1500, 0])]; + tensor v_157_internal_tensor_assign_1_stride_0 = const()[name = string("v_157_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_157_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_157_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_157_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_157_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_350, begin_mask = v_157_internal_tensor_assign_1_begin_mask_0, end = concat_351, end_mask = v_157_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_157_internal_tensor_assign_1_squeeze_mask_0, stride = v_157_internal_tensor_assign_1_stride_0, update = v_cache_63_cast_fp16, x = k_7_to_fp16)[name = string("v_157_internal_tensor_assign_1_cast_fp16")]; + tensor concat_352x = const()[name = string("concat_352x"), val = tensor([1, -1, 16, 64])]; + tensor var_3450_cast_fp16 = reshape(shape = concat_352x, x = linear_124_cast_fp16)[name = string("op_3450_cast_fp16")]; + tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_127_cast_fp16 = mul(x = var_3450_cast_fp16, y = const_182_to_fp16)[name = string("q_127_cast_fp16")]; + tensor var_3456 = const()[name = string("op_3456"), val = tensor([1, 1500, 16, -1])]; + tensor var_3457_cast_fp16 = reshape(shape = var_3456, x = k_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3457_cast_fp16")]; + tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_159_cast_fp16 = mul(x = var_3457_cast_fp16, y = const_183_to_fp16)[name = string("k_159_cast_fp16")]; + tensor var_3463 = const()[name = string("op_3463"), val = tensor([1, 1500, 16, -1])]; + tensor var_3464_cast_fp16 = reshape(shape = var_3463, x = v_157_internal_tensor_assign_1_cast_fp16)[name = string("op_3464_cast_fp16")]; + tensor var_3465 = const()[name = string("op_3465"), val = tensor([0, 2, 1, 3])]; + bool qk_95_transpose_x_0 = const()[name = string("qk_95_transpose_x_0"), val = bool(false)]; + bool qk_95_transpose_y_0 = const()[name = string("qk_95_transpose_y_0"), val = bool(false)]; + tensor transpose_255_perm_0 = const()[name = string("transpose_255_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_256_perm_0 = const()[name = string("transpose_256_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_256 = transpose(perm = transpose_256_perm_0, x = k_159_cast_fp16)[name = string("transpose_354")]; + tensor transpose_255 = transpose(perm = transpose_255_perm_0, x = q_127_cast_fp16)[name = string("transpose_355")]; + tensor qk_95_cast_fp16 = matmul(transpose_x = qk_95_transpose_x_0, transpose_y = qk_95_transpose_y_0, x = transpose_255, y = transpose_256)[name = string("qk_95_cast_fp16")]; + tensor var_3469_cast_fp16 = softmax(axis = var_3313, x = qk_95_cast_fp16)[name = string("op_3469_cast_fp16")]; + bool var_3471_transpose_x_0 = const()[name = string("op_3471_transpose_x_0"), val = bool(false)]; + bool var_3471_transpose_y_0 = const()[name = string("op_3471_transpose_y_0"), val = bool(false)]; + tensor v_159_cast_fp16 = transpose(perm = var_3465, x = var_3464_cast_fp16)[name = string("transpose_356")]; + tensor var_3471_cast_fp16 = matmul(transpose_x = var_3471_transpose_x_0, transpose_y = var_3471_transpose_y_0, x = var_3469_cast_fp16, y = v_159_cast_fp16)[name = string("op_3471_cast_fp16")]; + tensor var_3472 = const()[name = string("op_3472"), val = tensor([0, 2, 1, 3])]; + tensor concat_353x = const()[name = string("concat_353x"), val = tensor([1, -1, 1024])]; + tensor var_3473_cast_fp16 = transpose(perm = var_3472, x = var_3471_cast_fp16)[name = string("transpose_353")]; + tensor x_283_cast_fp16 = reshape(shape = concat_353x, x = var_3473_cast_fp16)[name = string("x_283_cast_fp16")]; + tensor var_3477_to_fp16 = const()[name = string("op_3477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562029440)))]; + tensor var_3478_to_fp16 = const()[name = string("op_3478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564126656)))]; + tensor linear_125_cast_fp16 = linear(bias = var_3478_to_fp16, weight = var_3477_to_fp16, x = x_283_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_125_cast_fp16)[name = string("x_285_cast_fp16")]; + tensor var_3485_axes_0 = const()[name = string("op_3485_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564128768)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564130880)))]; + tensor var_3485_cast_fp16 = layer_norm(axes = var_3485_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_3319_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_285_cast_fp16)[name = string("op_3485_cast_fp16")]; + tensor var_3494_to_fp16 = const()[name = string("op_3494_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564132992)))]; + tensor var_3495_to_fp16 = const()[name = string("op_3495_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572521664)))]; + tensor linear_126_cast_fp16 = linear(bias = var_3495_to_fp16, weight = var_3494_to_fp16, x = var_3485_cast_fp16)[name = string("linear_126_cast_fp16")]; + string x_289_mode_0 = const()[name = string("x_289_mode_0"), val = string("EXACT")]; + tensor x_289_cast_fp16 = gelu(mode = x_289_mode_0, x = linear_126_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_3500_to_fp16 = const()[name = string("op_3500_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572529920)))]; + tensor var_3501_to_fp16 = const()[name = string("op_3501_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580918592)))]; + tensor linear_127_cast_fp16 = linear(bias = var_3501_to_fp16, weight = var_3500_to_fp16, x = x_289_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor x_291_cast_fp16 = add(x = x_285_cast_fp16, y = linear_127_cast_fp16)[name = string("x_291_cast_fp16")]; + tensor k_cache_65_begin_0 = const()[name = string("k_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_65_end_0 = const()[name = string("k_cache_65_end_0"), val = tensor([17, 1, 448, 1024])]; + tensor k_cache_65_end_mask_0 = const()[name = string("k_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_65_squeeze_mask_0 = const()[name = string("k_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_65_cast_fp16 = slice_by_index(begin = k_cache_65_begin_0, end = k_cache_65_end_0, end_mask = k_cache_65_end_mask_0, squeeze_mask = k_cache_65_squeeze_mask_0, x = coreml_update_state_78)[name = string("k_cache_65_cast_fp16")]; + tensor v_cache_65_begin_0 = const()[name = string("v_cache_65_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_65_end_0 = const()[name = string("v_cache_65_end_0"), val = tensor([17, 1, 448, 1024])]; + tensor v_cache_65_end_mask_0 = const()[name = string("v_cache_65_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_65_squeeze_mask_0 = const()[name = string("v_cache_65_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_65_cast_fp16 = slice_by_index(begin = v_cache_65_begin_0, end = v_cache_65_end_0, end_mask = v_cache_65_end_mask_0, squeeze_mask = v_cache_65_squeeze_mask_0, x = coreml_update_state_79)[name = string("v_cache_65_cast_fp16")]; + tensor k_cache_67_begin_0 = const()[name = string("k_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor k_cache_67_end_0 = const()[name = string("k_cache_67_end_0"), val = tensor([17, 1, 1500, 1024])]; + tensor k_cache_67_end_mask_0 = const()[name = string("k_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_67_squeeze_mask_0 = const()[name = string("k_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_67_cast_fp16 = slice_by_index(begin = k_cache_67_begin_0, end = k_cache_67_end_0, end_mask = k_cache_67_end_mask_0, squeeze_mask = k_cache_67_squeeze_mask_0, x = read_state_2)[name = string("k_cache_67_cast_fp16")]; + tensor v_cache_67_begin_0 = const()[name = string("v_cache_67_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor v_cache_67_end_0 = const()[name = string("v_cache_67_end_0"), val = tensor([17, 1, 1500, 1024])]; + tensor v_cache_67_end_mask_0 = const()[name = string("v_cache_67_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_67_squeeze_mask_0 = const()[name = string("v_cache_67_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_67_cast_fp16 = slice_by_index(begin = v_cache_67_begin_0, end = v_cache_67_end_0, end_mask = v_cache_67_end_mask_0, squeeze_mask = v_cache_67_squeeze_mask_0, x = read_state_3)[name = string("v_cache_67_cast_fp16")]; + int32 var_3524 = const()[name = string("op_3524"), val = int32(-1)]; + tensor var_3542_axes_0 = const()[name = string("op_3542_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580920704)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580922816)))]; + fp16 var_3530_to_fp16 = const()[name = string("op_3530_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3542_cast_fp16 = layer_norm(axes = var_3542_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_291_cast_fp16)[name = string("op_3542_cast_fp16")]; + tensor var_3553_to_fp16 = const()[name = string("op_3553_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580924928)))]; + tensor var_3554_to_fp16 = const()[name = string("op_3554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583022144)))]; + tensor linear_128_cast_fp16 = linear(bias = var_3554_to_fp16, weight = var_3553_to_fp16, x = var_3542_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_3557_to_fp16 = const()[name = string("op_3557_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583024256)))]; + tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3557_to_fp16, x = var_3542_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor var_3561_to_fp16 = const()[name = string("op_3561_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(585121472)))]; + tensor var_3562_to_fp16 = const()[name = string("op_3562_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587218688)))]; + tensor linear_130_cast_fp16 = linear(bias = var_3562_to_fp16, weight = var_3561_to_fp16, x = var_3542_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_3564_shape_cast_fp16 = shape(x = linear_128_cast_fp16)[name = string("op_3564_shape_cast_fp16")]; + int32 gather_194_axis_0 = const()[name = string("gather_194_axis_0"), val = int32(0)]; + int32 gather_194_batch_dims_0 = const()[name = string("gather_194_batch_dims_0"), val = int32(0)]; + bool gather_194_validate_indices_0 = const()[name = string("gather_194_validate_indices_0"), val = bool(false)]; + string var_3564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_194_to_uint16 = const()[name = string("select_194_to_uint16"), val = uint16(1)]; + tensor var_3564_shape_cast_fp16_to_uint16 = cast(dtype = var_3564_shape_cast_fp16_to_uint16_dtype_0, x = var_3564_shape_cast_fp16)[name = string("cast_262")]; + uint16 gather_194_cast_uint16 = gather(axis = gather_194_axis_0, batch_dims = gather_194_batch_dims_0, indices = select_194_to_uint16, validate_indices = gather_194_validate_indices_0, x = var_3564_shape_cast_fp16_to_uint16)[name = string("gather_194_cast_uint16")]; + string gather_194_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_194_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_194_cast_uint16_to_int32 = cast(dtype = gather_194_cast_uint16_to_int32_dtype_0, x = gather_194_cast_uint16)[name = string("cast_261")]; + int32 end_step_35 = add(x = offset, y = gather_194_cast_uint16_to_int32)[name = string("end_step_35")]; + tensor expand_dims_256 = const()[name = string("expand_dims_256"), val = tensor([0])]; + tensor expand_dims_258 = const()[name = string("expand_dims_258"), val = tensor([0])]; + tensor expand_dims_259_axes_0 = const()[name = string("expand_dims_259_axes_0"), val = tensor([0])]; + tensor expand_dims_259 = expand_dims(axes = expand_dims_259_axes_0, x = end_step_35)[name = string("expand_dims_259")]; + tensor concat_356_values0_0 = const()[name = string("concat_356_values0_0"), val = tensor([16])]; + int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; + bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; + tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (concat_356_values0_0, expand_dims_256, expand_dims_1, expand_dims_258))[name = string("concat_356")]; + tensor concat_357_values0_0 = const()[name = string("concat_357_values0_0"), val = tensor([0])]; + tensor concat_357_values1_0 = const()[name = string("concat_357_values1_0"), val = tensor([0])]; + tensor concat_357_values3_0 = const()[name = string("concat_357_values3_0"), val = tensor([0])]; + int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; + bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; + tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (concat_357_values0_0, concat_357_values1_0, expand_dims_259, concat_357_values3_0))[name = string("concat_357")]; + tensor k_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = k_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = k_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_17_stride_0, update = linear_129_cast_fp16, x = coreml_update_state_78)[name = string("k_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_17_cast_fp16, input = k_cache1)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = k_cache1)[name = string("coreml_update_state_80")]; + tensor v_cache1_internal_tensor_assign_17_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_17_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_356, begin_mask = v_cache1_internal_tensor_assign_17_begin_mask_0, end = concat_357, end_mask = v_cache1_internal_tensor_assign_17_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_17_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_17_stride_0, update = linear_130_cast_fp16, x = coreml_update_state_79)[name = string("v_cache1_internal_tensor_assign_17_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_17_cast_fp16, input = v_cache1)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = v_cache1)[name = string("coreml_update_state_81")]; + int32 concat_362_values0_0 = const()[name = string("concat_362_values0_0"), val = int32(1)]; + int32 concat_362_values2_0 = const()[name = string("concat_362_values2_0"), val = int32(1024)]; + int32 concat_362_axis_0 = const()[name = string("concat_362_axis_0"), val = int32(0)]; + bool concat_362_interleave_0 = const()[name = string("concat_362_interleave_0"), val = bool(false)]; + tensor concat_362 = concat(axis = concat_362_axis_0, interleave = concat_362_interleave_0, values = (concat_362_values0_0, end_step_35, concat_362_values2_0))[name = string("concat_362")]; + tensor var_3580_begin_0 = const()[name = string("op_3580_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3580_end_mask_0 = const()[name = string("op_3580_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3580_cast_fp16 = slice_by_index(begin = var_3580_begin_0, end = concat_362, end_mask = var_3580_end_mask_0, x = k_cache_65_cast_fp16)[name = string("op_3580_cast_fp16")]; + tensor var_3583_begin_0 = const()[name = string("op_3583_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3583_end_mask_0 = const()[name = string("op_3583_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3583_cast_fp16 = slice_by_index(begin = var_3583_begin_0, end = concat_362, end_mask = var_3583_end_mask_0, x = v_cache_65_cast_fp16)[name = string("op_3583_cast_fp16")]; + tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 16, 64])]; + tensor var_3593_cast_fp16 = reshape(shape = concat_364x, x = linear_128_cast_fp16)[name = string("op_3593_cast_fp16")]; + tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_131_cast_fp16 = mul(x = var_3593_cast_fp16, y = const_184_to_fp16)[name = string("q_131_cast_fp16")]; + tensor concat_365x = const()[name = string("concat_365x"), val = tensor([1, -1, 16, 64])]; + tensor var_3600_cast_fp16 = reshape(shape = concat_365x, x = var_3580_cast_fp16)[name = string("op_3600_cast_fp16")]; + tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_165_cast_fp16 = mul(x = var_3600_cast_fp16, y = const_185_to_fp16)[name = string("k_165_cast_fp16")]; + tensor concat_366x = const()[name = string("concat_366x"), val = tensor([1, -1, 16, 64])]; + tensor var_3607_cast_fp16 = reshape(shape = concat_366x, x = var_3583_cast_fp16)[name = string("op_3607_cast_fp16")]; + tensor var_3608 = const()[name = string("op_3608"), val = tensor([0, 2, 1, 3])]; + bool qk_97_transpose_x_0 = const()[name = string("qk_97_transpose_x_0"), val = bool(false)]; + bool qk_97_transpose_y_0 = const()[name = string("qk_97_transpose_y_0"), val = bool(false)]; + tensor transpose_257_perm_0 = const()[name = string("transpose_257_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_258_perm_0 = const()[name = string("transpose_258_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_258 = transpose(perm = transpose_258_perm_0, x = k_165_cast_fp16)[name = string("transpose_350")]; + tensor transpose_257 = transpose(perm = transpose_257_perm_0, x = q_131_cast_fp16)[name = string("transpose_351")]; + tensor qk_97_cast_fp16 = matmul(transpose_x = qk_97_transpose_x_0, transpose_y = qk_97_transpose_y_0, x = transpose_257, y = transpose_258)[name = string("qk_97_cast_fp16")]; + int32 concat_367_values1_0 = const()[name = string("concat_367_values1_0"), val = int32(448)]; + int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; + bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; + tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (gather_194_cast_uint16_to_int32, concat_367_values1_0))[name = string("concat_367")]; + tensor var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor([0, 0])]; + tensor var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor([false, true])]; + tensor var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = concat_367, end_mask = var_3611_end_mask_0, x = mask_to_fp16)[name = string("op_3611_cast_fp16")]; + int32 concat_368_values0_0 = const()[name = string("concat_368_values0_0"), val = int32(0)]; + int32 concat_368_axis_0 = const()[name = string("concat_368_axis_0"), val = int32(0)]; + bool concat_368_interleave_0 = const()[name = string("concat_368_interleave_0"), val = bool(false)]; + tensor concat_368 = concat(axis = concat_368_axis_0, interleave = concat_368_interleave_0, values = (concat_368_values0_0, gather_194_cast_uint16_to_int32))[name = string("concat_368")]; + tensor var_3612_begin_0 = const()[name = string("op_3612_begin_0"), val = tensor([0, 0])]; + tensor var_3612_end_mask_0 = const()[name = string("op_3612_end_mask_0"), val = tensor([true, false])]; + tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = concat_368, end_mask = var_3612_end_mask_0, x = var_3611_cast_fp16)[name = string("op_3612_cast_fp16")]; + tensor qk_99_cast_fp16 = add(x = qk_97_cast_fp16, y = var_3612_cast_fp16)[name = string("qk_99_cast_fp16")]; + tensor var_3615_cast_fp16 = softmax(axis = var_3524, x = qk_99_cast_fp16)[name = string("op_3615_cast_fp16")]; + bool var_3617_transpose_x_0 = const()[name = string("op_3617_transpose_x_0"), val = bool(false)]; + bool var_3617_transpose_y_0 = const()[name = string("op_3617_transpose_y_0"), val = bool(false)]; + tensor v_165_cast_fp16 = transpose(perm = var_3608, x = var_3607_cast_fp16)[name = string("transpose_352")]; + tensor var_3617_cast_fp16 = matmul(transpose_x = var_3617_transpose_x_0, transpose_y = var_3617_transpose_y_0, x = var_3615_cast_fp16, y = v_165_cast_fp16)[name = string("op_3617_cast_fp16")]; + tensor var_3618 = const()[name = string("op_3618"), val = tensor([0, 2, 1, 3])]; + tensor concat_369x = const()[name = string("concat_369x"), val = tensor([1, -1, 1024])]; + tensor var_3619_cast_fp16 = transpose(perm = var_3618, x = var_3617_cast_fp16)[name = string("transpose_349")]; + tensor x_295_cast_fp16 = reshape(shape = concat_369x, x = var_3619_cast_fp16)[name = string("x_295_cast_fp16")]; + tensor var_3623_to_fp16 = const()[name = string("op_3623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(587220800)))]; + tensor var_3624_to_fp16 = const()[name = string("op_3624_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589318016)))]; + tensor linear_131_cast_fp16 = linear(bias = var_3624_to_fp16, weight = var_3623_to_fp16, x = x_295_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_297_cast_fp16 = add(x = x_291_cast_fp16, y = linear_131_cast_fp16)[name = string("x_297_cast_fp16")]; + tensor var_3631_axes_0 = const()[name = string("op_3631_axes_0"), val = tensor([-1])]; + tensor blocks_16_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589320128)))]; + tensor blocks_16_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589322240)))]; + tensor var_3631_cast_fp16 = layer_norm(axes = var_3631_axes_0, beta = blocks_16_cross_attn_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_cross_attn_ln_weight_to_fp16, x = x_297_cast_fp16)[name = string("op_3631_cast_fp16")]; + tensor var_3640_to_fp16 = const()[name = string("op_3640_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589324352)))]; + tensor var_3641_to_fp16 = const()[name = string("op_3641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591421568)))]; + tensor linear_132_cast_fp16 = linear(bias = var_3641_to_fp16, weight = var_3640_to_fp16, x = var_3631_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor concat_370 = const()[name = string("concat_370"), val = tensor([0, 0, 0])]; + tensor concat_371 = const()[name = string("concat_371"), val = tensor([0, 1500, 0])]; + tensor k_167_internal_tensor_assign_1_stride_0 = const()[name = string("k_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_370, begin_mask = k_167_internal_tensor_assign_1_begin_mask_0, end = concat_371, end_mask = k_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_167_internal_tensor_assign_1_squeeze_mask_0, stride = k_167_internal_tensor_assign_1_stride_0, update = k_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("k_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_372 = const()[name = string("concat_372"), val = tensor([0, 0, 0])]; + tensor concat_373 = const()[name = string("concat_373"), val = tensor([0, 1500, 0])]; + tensor v_167_internal_tensor_assign_1_stride_0 = const()[name = string("v_167_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_167_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_167_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_167_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_167_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_372, begin_mask = v_167_internal_tensor_assign_1_begin_mask_0, end = concat_373, end_mask = v_167_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_167_internal_tensor_assign_1_squeeze_mask_0, stride = v_167_internal_tensor_assign_1_stride_0, update = v_cache_67_cast_fp16, x = k_7_to_fp16)[name = string("v_167_internal_tensor_assign_1_cast_fp16")]; + tensor concat_374x = const()[name = string("concat_374x"), val = tensor([1, -1, 16, 64])]; + tensor var_3661_cast_fp16 = reshape(shape = concat_374x, x = linear_132_cast_fp16)[name = string("op_3661_cast_fp16")]; + tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_135_cast_fp16 = mul(x = var_3661_cast_fp16, y = const_186_to_fp16)[name = string("q_135_cast_fp16")]; + tensor var_3667 = const()[name = string("op_3667"), val = tensor([1, 1500, 16, -1])]; + tensor var_3668_cast_fp16 = reshape(shape = var_3667, x = k_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3668_cast_fp16")]; + tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_169_cast_fp16 = mul(x = var_3668_cast_fp16, y = const_187_to_fp16)[name = string("k_169_cast_fp16")]; + tensor var_3674 = const()[name = string("op_3674"), val = tensor([1, 1500, 16, -1])]; + tensor var_3675_cast_fp16 = reshape(shape = var_3674, x = v_167_internal_tensor_assign_1_cast_fp16)[name = string("op_3675_cast_fp16")]; + tensor var_3676 = const()[name = string("op_3676"), val = tensor([0, 2, 1, 3])]; + bool qk_101_transpose_x_0 = const()[name = string("qk_101_transpose_x_0"), val = bool(false)]; + bool qk_101_transpose_y_0 = const()[name = string("qk_101_transpose_y_0"), val = bool(false)]; + tensor transpose_259_perm_0 = const()[name = string("transpose_259_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_260_perm_0 = const()[name = string("transpose_260_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_260 = transpose(perm = transpose_260_perm_0, x = k_169_cast_fp16)[name = string("transpose_346")]; + tensor transpose_259 = transpose(perm = transpose_259_perm_0, x = q_135_cast_fp16)[name = string("transpose_347")]; + tensor qk_101_cast_fp16 = matmul(transpose_x = qk_101_transpose_x_0, transpose_y = qk_101_transpose_y_0, x = transpose_259, y = transpose_260)[name = string("qk_101_cast_fp16")]; + tensor var_3680_cast_fp16 = softmax(axis = var_3524, x = qk_101_cast_fp16)[name = string("op_3680_cast_fp16")]; + bool var_3682_transpose_x_0 = const()[name = string("op_3682_transpose_x_0"), val = bool(false)]; + bool var_3682_transpose_y_0 = const()[name = string("op_3682_transpose_y_0"), val = bool(false)]; + tensor v_169_cast_fp16 = transpose(perm = var_3676, x = var_3675_cast_fp16)[name = string("transpose_348")]; + tensor var_3682_cast_fp16 = matmul(transpose_x = var_3682_transpose_x_0, transpose_y = var_3682_transpose_y_0, x = var_3680_cast_fp16, y = v_169_cast_fp16)[name = string("op_3682_cast_fp16")]; + tensor var_3683 = const()[name = string("op_3683"), val = tensor([0, 2, 1, 3])]; + tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, -1, 1024])]; + tensor var_3684_cast_fp16 = transpose(perm = var_3683, x = var_3682_cast_fp16)[name = string("transpose_345")]; + tensor x_301_cast_fp16 = reshape(shape = concat_375x, x = var_3684_cast_fp16)[name = string("x_301_cast_fp16")]; + tensor var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591423680)))]; + tensor var_3689_to_fp16 = const()[name = string("op_3689_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593520896)))]; + tensor linear_133_cast_fp16 = linear(bias = var_3689_to_fp16, weight = var_3688_to_fp16, x = x_301_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor x_303_cast_fp16 = add(x = x_297_cast_fp16, y = linear_133_cast_fp16)[name = string("x_303_cast_fp16")]; + tensor var_3696_axes_0 = const()[name = string("op_3696_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593523008)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593525120)))]; + tensor var_3696_cast_fp16 = layer_norm(axes = var_3696_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_3530_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_303_cast_fp16)[name = string("op_3696_cast_fp16")]; + tensor var_3705_to_fp16 = const()[name = string("op_3705_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593527232)))]; + tensor var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601915904)))]; + tensor linear_134_cast_fp16 = linear(bias = var_3706_to_fp16, weight = var_3705_to_fp16, x = var_3696_cast_fp16)[name = string("linear_134_cast_fp16")]; + string x_307_mode_0 = const()[name = string("x_307_mode_0"), val = string("EXACT")]; + tensor x_307_cast_fp16 = gelu(mode = x_307_mode_0, x = linear_134_cast_fp16)[name = string("x_307_cast_fp16")]; + tensor var_3711_to_fp16 = const()[name = string("op_3711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(601924160)))]; + tensor var_3712_to_fp16 = const()[name = string("op_3712_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610312832)))]; + tensor linear_135_cast_fp16 = linear(bias = var_3712_to_fp16, weight = var_3711_to_fp16, x = x_307_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_309_cast_fp16 = add(x = x_303_cast_fp16, y = linear_135_cast_fp16)[name = string("x_309_cast_fp16")]; + tensor k_cache_69_begin_0 = const()[name = string("k_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_69_end_0 = const()[name = string("k_cache_69_end_0"), val = tensor([18, 1, 448, 1024])]; + tensor k_cache_69_end_mask_0 = const()[name = string("k_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_69_squeeze_mask_0 = const()[name = string("k_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_69_cast_fp16 = slice_by_index(begin = k_cache_69_begin_0, end = k_cache_69_end_0, end_mask = k_cache_69_end_mask_0, squeeze_mask = k_cache_69_squeeze_mask_0, x = coreml_update_state_80)[name = string("k_cache_69_cast_fp16")]; + tensor v_cache_69_begin_0 = const()[name = string("v_cache_69_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_69_end_0 = const()[name = string("v_cache_69_end_0"), val = tensor([18, 1, 448, 1024])]; + tensor v_cache_69_end_mask_0 = const()[name = string("v_cache_69_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_69_squeeze_mask_0 = const()[name = string("v_cache_69_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_69_cast_fp16 = slice_by_index(begin = v_cache_69_begin_0, end = v_cache_69_end_0, end_mask = v_cache_69_end_mask_0, squeeze_mask = v_cache_69_squeeze_mask_0, x = coreml_update_state_81)[name = string("v_cache_69_cast_fp16")]; + tensor k_cache_71_begin_0 = const()[name = string("k_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor k_cache_71_end_0 = const()[name = string("k_cache_71_end_0"), val = tensor([18, 1, 1500, 1024])]; + tensor k_cache_71_end_mask_0 = const()[name = string("k_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_71_squeeze_mask_0 = const()[name = string("k_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_71_cast_fp16 = slice_by_index(begin = k_cache_71_begin_0, end = k_cache_71_end_0, end_mask = k_cache_71_end_mask_0, squeeze_mask = k_cache_71_squeeze_mask_0, x = read_state_2)[name = string("k_cache_71_cast_fp16")]; + tensor v_cache_71_begin_0 = const()[name = string("v_cache_71_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor v_cache_71_end_0 = const()[name = string("v_cache_71_end_0"), val = tensor([18, 1, 1500, 1024])]; + tensor v_cache_71_end_mask_0 = const()[name = string("v_cache_71_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_71_squeeze_mask_0 = const()[name = string("v_cache_71_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_71_cast_fp16 = slice_by_index(begin = v_cache_71_begin_0, end = v_cache_71_end_0, end_mask = v_cache_71_end_mask_0, squeeze_mask = v_cache_71_squeeze_mask_0, x = read_state_3)[name = string("v_cache_71_cast_fp16")]; + int32 var_3735 = const()[name = string("op_3735"), val = int32(-1)]; + tensor var_3753_axes_0 = const()[name = string("op_3753_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610314944)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610317056)))]; + fp16 var_3741_to_fp16 = const()[name = string("op_3741_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3753_cast_fp16 = layer_norm(axes = var_3753_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_309_cast_fp16)[name = string("op_3753_cast_fp16")]; + tensor var_3764_to_fp16 = const()[name = string("op_3764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(610319168)))]; + tensor var_3765_to_fp16 = const()[name = string("op_3765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612416384)))]; + tensor linear_136_cast_fp16 = linear(bias = var_3765_to_fp16, weight = var_3764_to_fp16, x = var_3753_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor var_3768_to_fp16 = const()[name = string("op_3768_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(612418496)))]; + tensor linear_137_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3768_to_fp16, x = var_3753_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3772_to_fp16 = const()[name = string("op_3772_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614515712)))]; + tensor var_3773_to_fp16 = const()[name = string("op_3773_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616612928)))]; + tensor linear_138_cast_fp16 = linear(bias = var_3773_to_fp16, weight = var_3772_to_fp16, x = var_3753_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_3775_shape_cast_fp16 = shape(x = linear_136_cast_fp16)[name = string("op_3775_shape_cast_fp16")]; + int32 gather_206_axis_0 = const()[name = string("gather_206_axis_0"), val = int32(0)]; + int32 gather_206_batch_dims_0 = const()[name = string("gather_206_batch_dims_0"), val = int32(0)]; + bool gather_206_validate_indices_0 = const()[name = string("gather_206_validate_indices_0"), val = bool(false)]; + string var_3775_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3775_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_206_to_uint16 = const()[name = string("select_206_to_uint16"), val = uint16(1)]; + tensor var_3775_shape_cast_fp16_to_uint16 = cast(dtype = var_3775_shape_cast_fp16_to_uint16_dtype_0, x = var_3775_shape_cast_fp16)[name = string("cast_260")]; + uint16 gather_206_cast_uint16 = gather(axis = gather_206_axis_0, batch_dims = gather_206_batch_dims_0, indices = select_206_to_uint16, validate_indices = gather_206_validate_indices_0, x = var_3775_shape_cast_fp16_to_uint16)[name = string("gather_206_cast_uint16")]; + string gather_206_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_206_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_206_cast_uint16_to_int32 = cast(dtype = gather_206_cast_uint16_to_int32_dtype_0, x = gather_206_cast_uint16)[name = string("cast_259")]; + int32 end_step_37 = add(x = offset, y = gather_206_cast_uint16_to_int32)[name = string("end_step_37")]; + tensor expand_dims_272 = const()[name = string("expand_dims_272"), val = tensor([0])]; + tensor expand_dims_274 = const()[name = string("expand_dims_274"), val = tensor([0])]; + tensor expand_dims_275_axes_0 = const()[name = string("expand_dims_275_axes_0"), val = tensor([0])]; + tensor expand_dims_275 = expand_dims(axes = expand_dims_275_axes_0, x = end_step_37)[name = string("expand_dims_275")]; + tensor concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = tensor([17])]; + int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; + bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; + tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, expand_dims_272, expand_dims_1, expand_dims_274))[name = string("concat_378")]; + tensor concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = tensor([0])]; + tensor concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = tensor([0])]; + tensor concat_379_values3_0 = const()[name = string("concat_379_values3_0"), val = tensor([0])]; + int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; + bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; + tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, expand_dims_275, concat_379_values3_0))[name = string("concat_379")]; + tensor k_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = k_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = k_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_18_stride_0, update = linear_137_cast_fp16, x = coreml_update_state_80)[name = string("k_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_18_cast_fp16, input = k_cache1)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = k_cache1)[name = string("coreml_update_state_82")]; + tensor v_cache1_internal_tensor_assign_18_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_18_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_378, begin_mask = v_cache1_internal_tensor_assign_18_begin_mask_0, end = concat_379, end_mask = v_cache1_internal_tensor_assign_18_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_18_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_18_stride_0, update = linear_138_cast_fp16, x = coreml_update_state_81)[name = string("v_cache1_internal_tensor_assign_18_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_18_cast_fp16, input = v_cache1)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = v_cache1)[name = string("coreml_update_state_83")]; + int32 concat_384_values0_0 = const()[name = string("concat_384_values0_0"), val = int32(1)]; + int32 concat_384_values2_0 = const()[name = string("concat_384_values2_0"), val = int32(1024)]; + int32 concat_384_axis_0 = const()[name = string("concat_384_axis_0"), val = int32(0)]; + bool concat_384_interleave_0 = const()[name = string("concat_384_interleave_0"), val = bool(false)]; + tensor concat_384 = concat(axis = concat_384_axis_0, interleave = concat_384_interleave_0, values = (concat_384_values0_0, end_step_37, concat_384_values2_0))[name = string("concat_384")]; + tensor var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = concat_384, end_mask = var_3791_end_mask_0, x = k_cache_69_cast_fp16)[name = string("op_3791_cast_fp16")]; + tensor var_3794_begin_0 = const()[name = string("op_3794_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3794_end_mask_0 = const()[name = string("op_3794_end_mask_0"), val = tensor([true, false, true])]; + tensor var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = concat_384, end_mask = var_3794_end_mask_0, x = v_cache_69_cast_fp16)[name = string("op_3794_cast_fp16")]; + tensor concat_386x = const()[name = string("concat_386x"), val = tensor([1, -1, 16, 64])]; + tensor var_3804_cast_fp16 = reshape(shape = concat_386x, x = linear_136_cast_fp16)[name = string("op_3804_cast_fp16")]; + tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_139_cast_fp16 = mul(x = var_3804_cast_fp16, y = const_188_to_fp16)[name = string("q_139_cast_fp16")]; + tensor concat_387x = const()[name = string("concat_387x"), val = tensor([1, -1, 16, 64])]; + tensor var_3811_cast_fp16 = reshape(shape = concat_387x, x = var_3791_cast_fp16)[name = string("op_3811_cast_fp16")]; + tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_175_cast_fp16 = mul(x = var_3811_cast_fp16, y = const_189_to_fp16)[name = string("k_175_cast_fp16")]; + tensor concat_388x = const()[name = string("concat_388x"), val = tensor([1, -1, 16, 64])]; + tensor var_3818_cast_fp16 = reshape(shape = concat_388x, x = var_3794_cast_fp16)[name = string("op_3818_cast_fp16")]; + tensor var_3819 = const()[name = string("op_3819"), val = tensor([0, 2, 1, 3])]; + bool qk_103_transpose_x_0 = const()[name = string("qk_103_transpose_x_0"), val = bool(false)]; + bool qk_103_transpose_y_0 = const()[name = string("qk_103_transpose_y_0"), val = bool(false)]; + tensor transpose_261_perm_0 = const()[name = string("transpose_261_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_262_perm_0 = const()[name = string("transpose_262_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_262 = transpose(perm = transpose_262_perm_0, x = k_175_cast_fp16)[name = string("transpose_342")]; + tensor transpose_261 = transpose(perm = transpose_261_perm_0, x = q_139_cast_fp16)[name = string("transpose_343")]; + tensor qk_103_cast_fp16 = matmul(transpose_x = qk_103_transpose_x_0, transpose_y = qk_103_transpose_y_0, x = transpose_261, y = transpose_262)[name = string("qk_103_cast_fp16")]; + int32 concat_389_values1_0 = const()[name = string("concat_389_values1_0"), val = int32(448)]; + int32 concat_389_axis_0 = const()[name = string("concat_389_axis_0"), val = int32(0)]; + bool concat_389_interleave_0 = const()[name = string("concat_389_interleave_0"), val = bool(false)]; + tensor concat_389 = concat(axis = concat_389_axis_0, interleave = concat_389_interleave_0, values = (gather_206_cast_uint16_to_int32, concat_389_values1_0))[name = string("concat_389")]; + tensor var_3822_begin_0 = const()[name = string("op_3822_begin_0"), val = tensor([0, 0])]; + tensor var_3822_end_mask_0 = const()[name = string("op_3822_end_mask_0"), val = tensor([false, true])]; + tensor var_3822_cast_fp16 = slice_by_index(begin = var_3822_begin_0, end = concat_389, end_mask = var_3822_end_mask_0, x = mask_to_fp16)[name = string("op_3822_cast_fp16")]; + int32 concat_390_values0_0 = const()[name = string("concat_390_values0_0"), val = int32(0)]; + int32 concat_390_axis_0 = const()[name = string("concat_390_axis_0"), val = int32(0)]; + bool concat_390_interleave_0 = const()[name = string("concat_390_interleave_0"), val = bool(false)]; + tensor concat_390 = concat(axis = concat_390_axis_0, interleave = concat_390_interleave_0, values = (concat_390_values0_0, gather_206_cast_uint16_to_int32))[name = string("concat_390")]; + tensor var_3823_begin_0 = const()[name = string("op_3823_begin_0"), val = tensor([0, 0])]; + tensor var_3823_end_mask_0 = const()[name = string("op_3823_end_mask_0"), val = tensor([true, false])]; + tensor var_3823_cast_fp16 = slice_by_index(begin = var_3823_begin_0, end = concat_390, end_mask = var_3823_end_mask_0, x = var_3822_cast_fp16)[name = string("op_3823_cast_fp16")]; + tensor qk_105_cast_fp16 = add(x = qk_103_cast_fp16, y = var_3823_cast_fp16)[name = string("qk_105_cast_fp16")]; + tensor var_3826_cast_fp16 = softmax(axis = var_3735, x = qk_105_cast_fp16)[name = string("op_3826_cast_fp16")]; + bool var_3828_transpose_x_0 = const()[name = string("op_3828_transpose_x_0"), val = bool(false)]; + bool var_3828_transpose_y_0 = const()[name = string("op_3828_transpose_y_0"), val = bool(false)]; + tensor v_175_cast_fp16 = transpose(perm = var_3819, x = var_3818_cast_fp16)[name = string("transpose_344")]; + tensor var_3828_cast_fp16 = matmul(transpose_x = var_3828_transpose_x_0, transpose_y = var_3828_transpose_y_0, x = var_3826_cast_fp16, y = v_175_cast_fp16)[name = string("op_3828_cast_fp16")]; + tensor var_3829 = const()[name = string("op_3829"), val = tensor([0, 2, 1, 3])]; + tensor concat_391x = const()[name = string("concat_391x"), val = tensor([1, -1, 1024])]; + tensor var_3830_cast_fp16 = transpose(perm = var_3829, x = var_3828_cast_fp16)[name = string("transpose_341")]; + tensor x_313_cast_fp16 = reshape(shape = concat_391x, x = var_3830_cast_fp16)[name = string("x_313_cast_fp16")]; + tensor var_3834_to_fp16 = const()[name = string("op_3834_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616615040)))]; + tensor var_3835_to_fp16 = const()[name = string("op_3835_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618712256)))]; + tensor linear_139_cast_fp16 = linear(bias = var_3835_to_fp16, weight = var_3834_to_fp16, x = x_313_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor x_315_cast_fp16 = add(x = x_309_cast_fp16, y = linear_139_cast_fp16)[name = string("x_315_cast_fp16")]; + tensor var_3842_axes_0 = const()[name = string("op_3842_axes_0"), val = tensor([-1])]; + tensor blocks_17_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618714368)))]; + tensor blocks_17_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618716480)))]; + tensor var_3842_cast_fp16 = layer_norm(axes = var_3842_axes_0, beta = blocks_17_cross_attn_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_cross_attn_ln_weight_to_fp16, x = x_315_cast_fp16)[name = string("op_3842_cast_fp16")]; + tensor var_3851_to_fp16 = const()[name = string("op_3851_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(618718592)))]; + tensor var_3852_to_fp16 = const()[name = string("op_3852_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620815808)))]; + tensor linear_140_cast_fp16 = linear(bias = var_3852_to_fp16, weight = var_3851_to_fp16, x = var_3842_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor concat_392 = const()[name = string("concat_392"), val = tensor([0, 0, 0])]; + tensor concat_393 = const()[name = string("concat_393"), val = tensor([0, 1500, 0])]; + tensor k_177_internal_tensor_assign_1_stride_0 = const()[name = string("k_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_392, begin_mask = k_177_internal_tensor_assign_1_begin_mask_0, end = concat_393, end_mask = k_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_177_internal_tensor_assign_1_squeeze_mask_0, stride = k_177_internal_tensor_assign_1_stride_0, update = k_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("k_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_394 = const()[name = string("concat_394"), val = tensor([0, 0, 0])]; + tensor concat_395 = const()[name = string("concat_395"), val = tensor([0, 1500, 0])]; + tensor v_177_internal_tensor_assign_1_stride_0 = const()[name = string("v_177_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_177_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_177_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_177_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_177_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_394, begin_mask = v_177_internal_tensor_assign_1_begin_mask_0, end = concat_395, end_mask = v_177_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_177_internal_tensor_assign_1_squeeze_mask_0, stride = v_177_internal_tensor_assign_1_stride_0, update = v_cache_71_cast_fp16, x = k_7_to_fp16)[name = string("v_177_internal_tensor_assign_1_cast_fp16")]; + tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, -1, 16, 64])]; + tensor var_3872_cast_fp16 = reshape(shape = concat_396x, x = linear_140_cast_fp16)[name = string("op_3872_cast_fp16")]; + tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_143_cast_fp16 = mul(x = var_3872_cast_fp16, y = const_190_to_fp16)[name = string("q_143_cast_fp16")]; + tensor var_3878 = const()[name = string("op_3878"), val = tensor([1, 1500, 16, -1])]; + tensor var_3879_cast_fp16 = reshape(shape = var_3878, x = k_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3879_cast_fp16")]; + tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_179_cast_fp16 = mul(x = var_3879_cast_fp16, y = const_191_to_fp16)[name = string("k_179_cast_fp16")]; + tensor var_3885 = const()[name = string("op_3885"), val = tensor([1, 1500, 16, -1])]; + tensor var_3886_cast_fp16 = reshape(shape = var_3885, x = v_177_internal_tensor_assign_1_cast_fp16)[name = string("op_3886_cast_fp16")]; + tensor var_3887 = const()[name = string("op_3887"), val = tensor([0, 2, 1, 3])]; + bool qk_107_transpose_x_0 = const()[name = string("qk_107_transpose_x_0"), val = bool(false)]; + bool qk_107_transpose_y_0 = const()[name = string("qk_107_transpose_y_0"), val = bool(false)]; + tensor transpose_263_perm_0 = const()[name = string("transpose_263_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_264_perm_0 = const()[name = string("transpose_264_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_264 = transpose(perm = transpose_264_perm_0, x = k_179_cast_fp16)[name = string("transpose_338")]; + tensor transpose_263 = transpose(perm = transpose_263_perm_0, x = q_143_cast_fp16)[name = string("transpose_339")]; + tensor qk_107_cast_fp16 = matmul(transpose_x = qk_107_transpose_x_0, transpose_y = qk_107_transpose_y_0, x = transpose_263, y = transpose_264)[name = string("qk_107_cast_fp16")]; + tensor var_3891_cast_fp16 = softmax(axis = var_3735, x = qk_107_cast_fp16)[name = string("op_3891_cast_fp16")]; + bool var_3893_transpose_x_0 = const()[name = string("op_3893_transpose_x_0"), val = bool(false)]; + bool var_3893_transpose_y_0 = const()[name = string("op_3893_transpose_y_0"), val = bool(false)]; + tensor v_179_cast_fp16 = transpose(perm = var_3887, x = var_3886_cast_fp16)[name = string("transpose_340")]; + tensor var_3893_cast_fp16 = matmul(transpose_x = var_3893_transpose_x_0, transpose_y = var_3893_transpose_y_0, x = var_3891_cast_fp16, y = v_179_cast_fp16)[name = string("op_3893_cast_fp16")]; + tensor var_3894 = const()[name = string("op_3894"), val = tensor([0, 2, 1, 3])]; + tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, -1, 1024])]; + tensor var_3895_cast_fp16 = transpose(perm = var_3894, x = var_3893_cast_fp16)[name = string("transpose_337")]; + tensor x_319_cast_fp16 = reshape(shape = concat_397x, x = var_3895_cast_fp16)[name = string("x_319_cast_fp16")]; + tensor var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(620817920)))]; + tensor var_3900_to_fp16 = const()[name = string("op_3900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622915136)))]; + tensor linear_141_cast_fp16 = linear(bias = var_3900_to_fp16, weight = var_3899_to_fp16, x = x_319_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_321_cast_fp16 = add(x = x_315_cast_fp16, y = linear_141_cast_fp16)[name = string("x_321_cast_fp16")]; + tensor var_3907_axes_0 = const()[name = string("op_3907_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622917248)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622919360)))]; + tensor var_3907_cast_fp16 = layer_norm(axes = var_3907_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_3741_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_321_cast_fp16)[name = string("op_3907_cast_fp16")]; + tensor var_3916_to_fp16 = const()[name = string("op_3916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(622921472)))]; + tensor var_3917_to_fp16 = const()[name = string("op_3917_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631310144)))]; + tensor linear_142_cast_fp16 = linear(bias = var_3917_to_fp16, weight = var_3916_to_fp16, x = var_3907_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_325_mode_0 = const()[name = string("x_325_mode_0"), val = string("EXACT")]; + tensor x_325_cast_fp16 = gelu(mode = x_325_mode_0, x = linear_142_cast_fp16)[name = string("x_325_cast_fp16")]; + tensor var_3922_to_fp16 = const()[name = string("op_3922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(631318400)))]; + tensor var_3923_to_fp16 = const()[name = string("op_3923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639707072)))]; + tensor linear_143_cast_fp16 = linear(bias = var_3923_to_fp16, weight = var_3922_to_fp16, x = x_325_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_327_cast_fp16 = add(x = x_321_cast_fp16, y = linear_143_cast_fp16)[name = string("x_327_cast_fp16")]; + tensor k_cache_73_begin_0 = const()[name = string("k_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_73_end_0 = const()[name = string("k_cache_73_end_0"), val = tensor([19, 1, 448, 1024])]; + tensor k_cache_73_end_mask_0 = const()[name = string("k_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_73_squeeze_mask_0 = const()[name = string("k_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_73_cast_fp16 = slice_by_index(begin = k_cache_73_begin_0, end = k_cache_73_end_0, end_mask = k_cache_73_end_mask_0, squeeze_mask = k_cache_73_squeeze_mask_0, x = coreml_update_state_82)[name = string("k_cache_73_cast_fp16")]; + tensor v_cache_73_begin_0 = const()[name = string("v_cache_73_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_73_end_0 = const()[name = string("v_cache_73_end_0"), val = tensor([19, 1, 448, 1024])]; + tensor v_cache_73_end_mask_0 = const()[name = string("v_cache_73_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_73_squeeze_mask_0 = const()[name = string("v_cache_73_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_73_cast_fp16 = slice_by_index(begin = v_cache_73_begin_0, end = v_cache_73_end_0, end_mask = v_cache_73_end_mask_0, squeeze_mask = v_cache_73_squeeze_mask_0, x = coreml_update_state_83)[name = string("v_cache_73_cast_fp16")]; + tensor k_cache_75_begin_0 = const()[name = string("k_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor k_cache_75_end_0 = const()[name = string("k_cache_75_end_0"), val = tensor([19, 1, 1500, 1024])]; + tensor k_cache_75_end_mask_0 = const()[name = string("k_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_75_squeeze_mask_0 = const()[name = string("k_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_75_cast_fp16 = slice_by_index(begin = k_cache_75_begin_0, end = k_cache_75_end_0, end_mask = k_cache_75_end_mask_0, squeeze_mask = k_cache_75_squeeze_mask_0, x = read_state_2)[name = string("k_cache_75_cast_fp16")]; + tensor v_cache_75_begin_0 = const()[name = string("v_cache_75_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor v_cache_75_end_0 = const()[name = string("v_cache_75_end_0"), val = tensor([19, 1, 1500, 1024])]; + tensor v_cache_75_end_mask_0 = const()[name = string("v_cache_75_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_75_squeeze_mask_0 = const()[name = string("v_cache_75_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_75_cast_fp16 = slice_by_index(begin = v_cache_75_begin_0, end = v_cache_75_end_0, end_mask = v_cache_75_end_mask_0, squeeze_mask = v_cache_75_squeeze_mask_0, x = read_state_3)[name = string("v_cache_75_cast_fp16")]; + int32 var_3946 = const()[name = string("op_3946"), val = int32(-1)]; + tensor var_3964_axes_0 = const()[name = string("op_3964_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639709184)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639711296)))]; + fp16 var_3952_to_fp16 = const()[name = string("op_3952_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3964_cast_fp16 = layer_norm(axes = var_3964_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_327_cast_fp16)[name = string("op_3964_cast_fp16")]; + tensor var_3975_to_fp16 = const()[name = string("op_3975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(639713408)))]; + tensor var_3976_to_fp16 = const()[name = string("op_3976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641810624)))]; + tensor linear_144_cast_fp16 = linear(bias = var_3976_to_fp16, weight = var_3975_to_fp16, x = var_3964_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_3979_to_fp16 = const()[name = string("op_3979_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641812736)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_3979_to_fp16, x = var_3964_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor var_3983_to_fp16 = const()[name = string("op_3983_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(643909952)))]; + tensor var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646007168)))]; + tensor linear_146_cast_fp16 = linear(bias = var_3984_to_fp16, weight = var_3983_to_fp16, x = var_3964_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor var_3986_shape_cast_fp16 = shape(x = linear_144_cast_fp16)[name = string("op_3986_shape_cast_fp16")]; + int32 gather_218_axis_0 = const()[name = string("gather_218_axis_0"), val = int32(0)]; + int32 gather_218_batch_dims_0 = const()[name = string("gather_218_batch_dims_0"), val = int32(0)]; + bool gather_218_validate_indices_0 = const()[name = string("gather_218_validate_indices_0"), val = bool(false)]; + string var_3986_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3986_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_218_to_uint16 = const()[name = string("select_218_to_uint16"), val = uint16(1)]; + tensor var_3986_shape_cast_fp16_to_uint16 = cast(dtype = var_3986_shape_cast_fp16_to_uint16_dtype_0, x = var_3986_shape_cast_fp16)[name = string("cast_258")]; + uint16 gather_218_cast_uint16 = gather(axis = gather_218_axis_0, batch_dims = gather_218_batch_dims_0, indices = select_218_to_uint16, validate_indices = gather_218_validate_indices_0, x = var_3986_shape_cast_fp16_to_uint16)[name = string("gather_218_cast_uint16")]; + string gather_218_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_218_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_218_cast_uint16_to_int32 = cast(dtype = gather_218_cast_uint16_to_int32_dtype_0, x = gather_218_cast_uint16)[name = string("cast_257")]; + int32 end_step_39 = add(x = offset, y = gather_218_cast_uint16_to_int32)[name = string("end_step_39")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; + tensor expand_dims_290 = const()[name = string("expand_dims_290"), val = tensor([0])]; + tensor expand_dims_291_axes_0 = const()[name = string("expand_dims_291_axes_0"), val = tensor([0])]; + tensor expand_dims_291 = expand_dims(axes = expand_dims_291_axes_0, x = end_step_39)[name = string("expand_dims_291")]; + tensor concat_400_values0_0 = const()[name = string("concat_400_values0_0"), val = tensor([18])]; + int32 concat_400_axis_0 = const()[name = string("concat_400_axis_0"), val = int32(0)]; + bool concat_400_interleave_0 = const()[name = string("concat_400_interleave_0"), val = bool(false)]; + tensor concat_400 = concat(axis = concat_400_axis_0, interleave = concat_400_interleave_0, values = (concat_400_values0_0, expand_dims_288, expand_dims_1, expand_dims_290))[name = string("concat_400")]; + tensor concat_401_values0_0 = const()[name = string("concat_401_values0_0"), val = tensor([0])]; + tensor concat_401_values1_0 = const()[name = string("concat_401_values1_0"), val = tensor([0])]; + tensor concat_401_values3_0 = const()[name = string("concat_401_values3_0"), val = tensor([0])]; + int32 concat_401_axis_0 = const()[name = string("concat_401_axis_0"), val = int32(0)]; + bool concat_401_interleave_0 = const()[name = string("concat_401_interleave_0"), val = bool(false)]; + tensor concat_401 = concat(axis = concat_401_axis_0, interleave = concat_401_interleave_0, values = (concat_401_values0_0, concat_401_values1_0, expand_dims_291, concat_401_values3_0))[name = string("concat_401")]; + tensor k_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = k_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = k_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_19_stride_0, update = linear_145_cast_fp16, x = coreml_update_state_82)[name = string("k_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_19_cast_fp16, input = k_cache1)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = k_cache1)[name = string("coreml_update_state_84")]; + tensor v_cache1_internal_tensor_assign_19_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_19_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_400, begin_mask = v_cache1_internal_tensor_assign_19_begin_mask_0, end = concat_401, end_mask = v_cache1_internal_tensor_assign_19_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_19_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_19_stride_0, update = linear_146_cast_fp16, x = coreml_update_state_83)[name = string("v_cache1_internal_tensor_assign_19_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_19_cast_fp16, input = v_cache1)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = v_cache1)[name = string("coreml_update_state_85")]; + int32 concat_406_values0_0 = const()[name = string("concat_406_values0_0"), val = int32(1)]; + int32 concat_406_values2_0 = const()[name = string("concat_406_values2_0"), val = int32(1024)]; + int32 concat_406_axis_0 = const()[name = string("concat_406_axis_0"), val = int32(0)]; + bool concat_406_interleave_0 = const()[name = string("concat_406_interleave_0"), val = bool(false)]; + tensor concat_406 = concat(axis = concat_406_axis_0, interleave = concat_406_interleave_0, values = (concat_406_values0_0, end_step_39, concat_406_values2_0))[name = string("concat_406")]; + tensor var_4002_begin_0 = const()[name = string("op_4002_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4002_end_mask_0 = const()[name = string("op_4002_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4002_cast_fp16 = slice_by_index(begin = var_4002_begin_0, end = concat_406, end_mask = var_4002_end_mask_0, x = k_cache_73_cast_fp16)[name = string("op_4002_cast_fp16")]; + tensor var_4005_begin_0 = const()[name = string("op_4005_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4005_end_mask_0 = const()[name = string("op_4005_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4005_cast_fp16 = slice_by_index(begin = var_4005_begin_0, end = concat_406, end_mask = var_4005_end_mask_0, x = v_cache_73_cast_fp16)[name = string("op_4005_cast_fp16")]; + tensor concat_408x = const()[name = string("concat_408x"), val = tensor([1, -1, 16, 64])]; + tensor var_4015_cast_fp16 = reshape(shape = concat_408x, x = linear_144_cast_fp16)[name = string("op_4015_cast_fp16")]; + tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_147_cast_fp16 = mul(x = var_4015_cast_fp16, y = const_192_to_fp16)[name = string("q_147_cast_fp16")]; + tensor concat_409x = const()[name = string("concat_409x"), val = tensor([1, -1, 16, 64])]; + tensor var_4022_cast_fp16 = reshape(shape = concat_409x, x = var_4002_cast_fp16)[name = string("op_4022_cast_fp16")]; + tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_185_cast_fp16 = mul(x = var_4022_cast_fp16, y = const_193_to_fp16)[name = string("k_185_cast_fp16")]; + tensor concat_410x = const()[name = string("concat_410x"), val = tensor([1, -1, 16, 64])]; + tensor var_4029_cast_fp16 = reshape(shape = concat_410x, x = var_4005_cast_fp16)[name = string("op_4029_cast_fp16")]; + tensor var_4030 = const()[name = string("op_4030"), val = tensor([0, 2, 1, 3])]; + bool qk_109_transpose_x_0 = const()[name = string("qk_109_transpose_x_0"), val = bool(false)]; + bool qk_109_transpose_y_0 = const()[name = string("qk_109_transpose_y_0"), val = bool(false)]; + tensor transpose_265_perm_0 = const()[name = string("transpose_265_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_266_perm_0 = const()[name = string("transpose_266_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_266 = transpose(perm = transpose_266_perm_0, x = k_185_cast_fp16)[name = string("transpose_334")]; + tensor transpose_265 = transpose(perm = transpose_265_perm_0, x = q_147_cast_fp16)[name = string("transpose_335")]; + tensor qk_109_cast_fp16 = matmul(transpose_x = qk_109_transpose_x_0, transpose_y = qk_109_transpose_y_0, x = transpose_265, y = transpose_266)[name = string("qk_109_cast_fp16")]; + int32 concat_411_values1_0 = const()[name = string("concat_411_values1_0"), val = int32(448)]; + int32 concat_411_axis_0 = const()[name = string("concat_411_axis_0"), val = int32(0)]; + bool concat_411_interleave_0 = const()[name = string("concat_411_interleave_0"), val = bool(false)]; + tensor concat_411 = concat(axis = concat_411_axis_0, interleave = concat_411_interleave_0, values = (gather_218_cast_uint16_to_int32, concat_411_values1_0))[name = string("concat_411")]; + tensor var_4033_begin_0 = const()[name = string("op_4033_begin_0"), val = tensor([0, 0])]; + tensor var_4033_end_mask_0 = const()[name = string("op_4033_end_mask_0"), val = tensor([false, true])]; + tensor var_4033_cast_fp16 = slice_by_index(begin = var_4033_begin_0, end = concat_411, end_mask = var_4033_end_mask_0, x = mask_to_fp16)[name = string("op_4033_cast_fp16")]; + int32 concat_412_values0_0 = const()[name = string("concat_412_values0_0"), val = int32(0)]; + int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; + bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; + tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (concat_412_values0_0, gather_218_cast_uint16_to_int32))[name = string("concat_412")]; + tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([0, 0])]; + tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([true, false])]; + tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = concat_412, end_mask = var_4034_end_mask_0, x = var_4033_cast_fp16)[name = string("op_4034_cast_fp16")]; + tensor qk_111_cast_fp16 = add(x = qk_109_cast_fp16, y = var_4034_cast_fp16)[name = string("qk_111_cast_fp16")]; + tensor var_4037_cast_fp16 = softmax(axis = var_3946, x = qk_111_cast_fp16)[name = string("op_4037_cast_fp16")]; + bool var_4039_transpose_x_0 = const()[name = string("op_4039_transpose_x_0"), val = bool(false)]; + bool var_4039_transpose_y_0 = const()[name = string("op_4039_transpose_y_0"), val = bool(false)]; + tensor v_185_cast_fp16 = transpose(perm = var_4030, x = var_4029_cast_fp16)[name = string("transpose_336")]; + tensor var_4039_cast_fp16 = matmul(transpose_x = var_4039_transpose_x_0, transpose_y = var_4039_transpose_y_0, x = var_4037_cast_fp16, y = v_185_cast_fp16)[name = string("op_4039_cast_fp16")]; + tensor var_4040 = const()[name = string("op_4040"), val = tensor([0, 2, 1, 3])]; + tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, -1, 1024])]; + tensor var_4041_cast_fp16 = transpose(perm = var_4040, x = var_4039_cast_fp16)[name = string("transpose_333")]; + tensor x_331_cast_fp16 = reshape(shape = concat_413x, x = var_4041_cast_fp16)[name = string("x_331_cast_fp16")]; + tensor var_4045_to_fp16 = const()[name = string("op_4045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646009280)))]; + tensor var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648106496)))]; + tensor linear_147_cast_fp16 = linear(bias = var_4046_to_fp16, weight = var_4045_to_fp16, x = x_331_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor x_333_cast_fp16 = add(x = x_327_cast_fp16, y = linear_147_cast_fp16)[name = string("x_333_cast_fp16")]; + tensor var_4053_axes_0 = const()[name = string("op_4053_axes_0"), val = tensor([-1])]; + tensor blocks_18_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648108608)))]; + tensor blocks_18_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648110720)))]; + tensor var_4053_cast_fp16 = layer_norm(axes = var_4053_axes_0, beta = blocks_18_cross_attn_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_cross_attn_ln_weight_to_fp16, x = x_333_cast_fp16)[name = string("op_4053_cast_fp16")]; + tensor var_4062_to_fp16 = const()[name = string("op_4062_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(648112832)))]; + tensor var_4063_to_fp16 = const()[name = string("op_4063_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650210048)))]; + tensor linear_148_cast_fp16 = linear(bias = var_4063_to_fp16, weight = var_4062_to_fp16, x = var_4053_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor concat_414 = const()[name = string("concat_414"), val = tensor([0, 0, 0])]; + tensor concat_415 = const()[name = string("concat_415"), val = tensor([0, 1500, 0])]; + tensor k_187_internal_tensor_assign_1_stride_0 = const()[name = string("k_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_414, begin_mask = k_187_internal_tensor_assign_1_begin_mask_0, end = concat_415, end_mask = k_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_187_internal_tensor_assign_1_squeeze_mask_0, stride = k_187_internal_tensor_assign_1_stride_0, update = k_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("k_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_416 = const()[name = string("concat_416"), val = tensor([0, 0, 0])]; + tensor concat_417 = const()[name = string("concat_417"), val = tensor([0, 1500, 0])]; + tensor v_187_internal_tensor_assign_1_stride_0 = const()[name = string("v_187_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_187_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_187_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_187_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_187_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_416, begin_mask = v_187_internal_tensor_assign_1_begin_mask_0, end = concat_417, end_mask = v_187_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_187_internal_tensor_assign_1_squeeze_mask_0, stride = v_187_internal_tensor_assign_1_stride_0, update = v_cache_75_cast_fp16, x = k_7_to_fp16)[name = string("v_187_internal_tensor_assign_1_cast_fp16")]; + tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 16, 64])]; + tensor var_4083_cast_fp16 = reshape(shape = concat_418x, x = linear_148_cast_fp16)[name = string("op_4083_cast_fp16")]; + tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_151_cast_fp16 = mul(x = var_4083_cast_fp16, y = const_194_to_fp16)[name = string("q_151_cast_fp16")]; + tensor var_4089 = const()[name = string("op_4089"), val = tensor([1, 1500, 16, -1])]; + tensor var_4090_cast_fp16 = reshape(shape = var_4089, x = k_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4090_cast_fp16")]; + tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_189_cast_fp16 = mul(x = var_4090_cast_fp16, y = const_195_to_fp16)[name = string("k_189_cast_fp16")]; + tensor var_4096 = const()[name = string("op_4096"), val = tensor([1, 1500, 16, -1])]; + tensor var_4097_cast_fp16 = reshape(shape = var_4096, x = v_187_internal_tensor_assign_1_cast_fp16)[name = string("op_4097_cast_fp16")]; + tensor var_4098 = const()[name = string("op_4098"), val = tensor([0, 2, 1, 3])]; + bool qk_113_transpose_x_0 = const()[name = string("qk_113_transpose_x_0"), val = bool(false)]; + bool qk_113_transpose_y_0 = const()[name = string("qk_113_transpose_y_0"), val = bool(false)]; + tensor transpose_267_perm_0 = const()[name = string("transpose_267_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_268_perm_0 = const()[name = string("transpose_268_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_268 = transpose(perm = transpose_268_perm_0, x = k_189_cast_fp16)[name = string("transpose_330")]; + tensor transpose_267 = transpose(perm = transpose_267_perm_0, x = q_151_cast_fp16)[name = string("transpose_331")]; + tensor qk_113_cast_fp16 = matmul(transpose_x = qk_113_transpose_x_0, transpose_y = qk_113_transpose_y_0, x = transpose_267, y = transpose_268)[name = string("qk_113_cast_fp16")]; + tensor var_4102_cast_fp16 = softmax(axis = var_3946, x = qk_113_cast_fp16)[name = string("op_4102_cast_fp16")]; + bool var_4104_transpose_x_0 = const()[name = string("op_4104_transpose_x_0"), val = bool(false)]; + bool var_4104_transpose_y_0 = const()[name = string("op_4104_transpose_y_0"), val = bool(false)]; + tensor v_189_cast_fp16 = transpose(perm = var_4098, x = var_4097_cast_fp16)[name = string("transpose_332")]; + tensor var_4104_cast_fp16 = matmul(transpose_x = var_4104_transpose_x_0, transpose_y = var_4104_transpose_y_0, x = var_4102_cast_fp16, y = v_189_cast_fp16)[name = string("op_4104_cast_fp16")]; + tensor var_4105 = const()[name = string("op_4105"), val = tensor([0, 2, 1, 3])]; + tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 1024])]; + tensor var_4106_cast_fp16 = transpose(perm = var_4105, x = var_4104_cast_fp16)[name = string("transpose_329")]; + tensor x_337_cast_fp16 = reshape(shape = concat_419x, x = var_4106_cast_fp16)[name = string("x_337_cast_fp16")]; + tensor var_4110_to_fp16 = const()[name = string("op_4110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650212160)))]; + tensor var_4111_to_fp16 = const()[name = string("op_4111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652309376)))]; + tensor linear_149_cast_fp16 = linear(bias = var_4111_to_fp16, weight = var_4110_to_fp16, x = x_337_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor x_339_cast_fp16 = add(x = x_333_cast_fp16, y = linear_149_cast_fp16)[name = string("x_339_cast_fp16")]; + tensor var_4118_axes_0 = const()[name = string("op_4118_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652311488)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652313600)))]; + tensor var_4118_cast_fp16 = layer_norm(axes = var_4118_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_3952_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_339_cast_fp16)[name = string("op_4118_cast_fp16")]; + tensor var_4127_to_fp16 = const()[name = string("op_4127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(652315712)))]; + tensor var_4128_to_fp16 = const()[name = string("op_4128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660704384)))]; + tensor linear_150_cast_fp16 = linear(bias = var_4128_to_fp16, weight = var_4127_to_fp16, x = var_4118_cast_fp16)[name = string("linear_150_cast_fp16")]; + string x_343_mode_0 = const()[name = string("x_343_mode_0"), val = string("EXACT")]; + tensor x_343_cast_fp16 = gelu(mode = x_343_mode_0, x = linear_150_cast_fp16)[name = string("x_343_cast_fp16")]; + tensor var_4133_to_fp16 = const()[name = string("op_4133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660712640)))]; + tensor var_4134_to_fp16 = const()[name = string("op_4134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669101312)))]; + tensor linear_151_cast_fp16 = linear(bias = var_4134_to_fp16, weight = var_4133_to_fp16, x = x_343_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor x_345_cast_fp16 = add(x = x_339_cast_fp16, y = linear_151_cast_fp16)[name = string("x_345_cast_fp16")]; + tensor k_cache_77_begin_0 = const()[name = string("k_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_77_end_0 = const()[name = string("k_cache_77_end_0"), val = tensor([20, 1, 448, 1024])]; + tensor k_cache_77_end_mask_0 = const()[name = string("k_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_77_squeeze_mask_0 = const()[name = string("k_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_77_cast_fp16 = slice_by_index(begin = k_cache_77_begin_0, end = k_cache_77_end_0, end_mask = k_cache_77_end_mask_0, squeeze_mask = k_cache_77_squeeze_mask_0, x = coreml_update_state_84)[name = string("k_cache_77_cast_fp16")]; + tensor v_cache_77_begin_0 = const()[name = string("v_cache_77_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_77_end_0 = const()[name = string("v_cache_77_end_0"), val = tensor([20, 1, 448, 1024])]; + tensor v_cache_77_end_mask_0 = const()[name = string("v_cache_77_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_77_squeeze_mask_0 = const()[name = string("v_cache_77_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_77_cast_fp16 = slice_by_index(begin = v_cache_77_begin_0, end = v_cache_77_end_0, end_mask = v_cache_77_end_mask_0, squeeze_mask = v_cache_77_squeeze_mask_0, x = coreml_update_state_85)[name = string("v_cache_77_cast_fp16")]; + tensor k_cache_79_begin_0 = const()[name = string("k_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor k_cache_79_end_0 = const()[name = string("k_cache_79_end_0"), val = tensor([20, 1, 1500, 1024])]; + tensor k_cache_79_end_mask_0 = const()[name = string("k_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_79_squeeze_mask_0 = const()[name = string("k_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_79_cast_fp16 = slice_by_index(begin = k_cache_79_begin_0, end = k_cache_79_end_0, end_mask = k_cache_79_end_mask_0, squeeze_mask = k_cache_79_squeeze_mask_0, x = read_state_2)[name = string("k_cache_79_cast_fp16")]; + tensor v_cache_79_begin_0 = const()[name = string("v_cache_79_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor v_cache_79_end_0 = const()[name = string("v_cache_79_end_0"), val = tensor([20, 1, 1500, 1024])]; + tensor v_cache_79_end_mask_0 = const()[name = string("v_cache_79_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_79_squeeze_mask_0 = const()[name = string("v_cache_79_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_79_cast_fp16 = slice_by_index(begin = v_cache_79_begin_0, end = v_cache_79_end_0, end_mask = v_cache_79_end_mask_0, squeeze_mask = v_cache_79_squeeze_mask_0, x = read_state_3)[name = string("v_cache_79_cast_fp16")]; + int32 var_4157 = const()[name = string("op_4157"), val = int32(-1)]; + tensor var_4175_axes_0 = const()[name = string("op_4175_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669103424)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669105536)))]; + fp16 var_4163_to_fp16 = const()[name = string("op_4163_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4175_cast_fp16 = layer_norm(axes = var_4175_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_345_cast_fp16)[name = string("op_4175_cast_fp16")]; + tensor var_4186_to_fp16 = const()[name = string("op_4186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669107648)))]; + tensor var_4187_to_fp16 = const()[name = string("op_4187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671204864)))]; + tensor linear_152_cast_fp16 = linear(bias = var_4187_to_fp16, weight = var_4186_to_fp16, x = var_4175_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor var_4190_to_fp16 = const()[name = string("op_4190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(671206976)))]; + tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4190_to_fp16, x = var_4175_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor var_4194_to_fp16 = const()[name = string("op_4194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673304192)))]; + tensor var_4195_to_fp16 = const()[name = string("op_4195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675401408)))]; + tensor linear_154_cast_fp16 = linear(bias = var_4195_to_fp16, weight = var_4194_to_fp16, x = var_4175_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor var_4197_shape_cast_fp16 = shape(x = linear_152_cast_fp16)[name = string("op_4197_shape_cast_fp16")]; + int32 gather_230_axis_0 = const()[name = string("gather_230_axis_0"), val = int32(0)]; + int32 gather_230_batch_dims_0 = const()[name = string("gather_230_batch_dims_0"), val = int32(0)]; + bool gather_230_validate_indices_0 = const()[name = string("gather_230_validate_indices_0"), val = bool(false)]; + string var_4197_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4197_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_230_to_uint16 = const()[name = string("select_230_to_uint16"), val = uint16(1)]; + tensor var_4197_shape_cast_fp16_to_uint16 = cast(dtype = var_4197_shape_cast_fp16_to_uint16_dtype_0, x = var_4197_shape_cast_fp16)[name = string("cast_256")]; + uint16 gather_230_cast_uint16 = gather(axis = gather_230_axis_0, batch_dims = gather_230_batch_dims_0, indices = select_230_to_uint16, validate_indices = gather_230_validate_indices_0, x = var_4197_shape_cast_fp16_to_uint16)[name = string("gather_230_cast_uint16")]; + string gather_230_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_230_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_230_cast_uint16_to_int32 = cast(dtype = gather_230_cast_uint16_to_int32_dtype_0, x = gather_230_cast_uint16)[name = string("cast_255")]; + int32 end_step_41 = add(x = offset, y = gather_230_cast_uint16_to_int32)[name = string("end_step_41")]; + tensor expand_dims_304 = const()[name = string("expand_dims_304"), val = tensor([0])]; + tensor expand_dims_306 = const()[name = string("expand_dims_306"), val = tensor([0])]; + tensor expand_dims_307_axes_0 = const()[name = string("expand_dims_307_axes_0"), val = tensor([0])]; + tensor expand_dims_307 = expand_dims(axes = expand_dims_307_axes_0, x = end_step_41)[name = string("expand_dims_307")]; + tensor concat_422_values0_0 = const()[name = string("concat_422_values0_0"), val = tensor([19])]; + int32 concat_422_axis_0 = const()[name = string("concat_422_axis_0"), val = int32(0)]; + bool concat_422_interleave_0 = const()[name = string("concat_422_interleave_0"), val = bool(false)]; + tensor concat_422 = concat(axis = concat_422_axis_0, interleave = concat_422_interleave_0, values = (concat_422_values0_0, expand_dims_304, expand_dims_1, expand_dims_306))[name = string("concat_422")]; + tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([0])]; + tensor concat_423_values1_0 = const()[name = string("concat_423_values1_0"), val = tensor([0])]; + tensor concat_423_values3_0 = const()[name = string("concat_423_values3_0"), val = tensor([0])]; + int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; + bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; + tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, concat_423_values1_0, expand_dims_307, concat_423_values3_0))[name = string("concat_423")]; + tensor k_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = k_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = k_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_20_stride_0, update = linear_153_cast_fp16, x = coreml_update_state_84)[name = string("k_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_20_cast_fp16, input = k_cache1)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = k_cache1)[name = string("coreml_update_state_86")]; + tensor v_cache1_internal_tensor_assign_20_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_20_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_422, begin_mask = v_cache1_internal_tensor_assign_20_begin_mask_0, end = concat_423, end_mask = v_cache1_internal_tensor_assign_20_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_20_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_20_stride_0, update = linear_154_cast_fp16, x = coreml_update_state_85)[name = string("v_cache1_internal_tensor_assign_20_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_20_cast_fp16, input = v_cache1)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = v_cache1)[name = string("coreml_update_state_87")]; + int32 concat_428_values0_0 = const()[name = string("concat_428_values0_0"), val = int32(1)]; + int32 concat_428_values2_0 = const()[name = string("concat_428_values2_0"), val = int32(1024)]; + int32 concat_428_axis_0 = const()[name = string("concat_428_axis_0"), val = int32(0)]; + bool concat_428_interleave_0 = const()[name = string("concat_428_interleave_0"), val = bool(false)]; + tensor concat_428 = concat(axis = concat_428_axis_0, interleave = concat_428_interleave_0, values = (concat_428_values0_0, end_step_41, concat_428_values2_0))[name = string("concat_428")]; + tensor var_4213_begin_0 = const()[name = string("op_4213_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4213_end_mask_0 = const()[name = string("op_4213_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4213_cast_fp16 = slice_by_index(begin = var_4213_begin_0, end = concat_428, end_mask = var_4213_end_mask_0, x = k_cache_77_cast_fp16)[name = string("op_4213_cast_fp16")]; + tensor var_4216_begin_0 = const()[name = string("op_4216_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4216_end_mask_0 = const()[name = string("op_4216_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4216_cast_fp16 = slice_by_index(begin = var_4216_begin_0, end = concat_428, end_mask = var_4216_end_mask_0, x = v_cache_77_cast_fp16)[name = string("op_4216_cast_fp16")]; + tensor concat_430x = const()[name = string("concat_430x"), val = tensor([1, -1, 16, 64])]; + tensor var_4226_cast_fp16 = reshape(shape = concat_430x, x = linear_152_cast_fp16)[name = string("op_4226_cast_fp16")]; + tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_155_cast_fp16 = mul(x = var_4226_cast_fp16, y = const_196_to_fp16)[name = string("q_155_cast_fp16")]; + tensor concat_431x = const()[name = string("concat_431x"), val = tensor([1, -1, 16, 64])]; + tensor var_4233_cast_fp16 = reshape(shape = concat_431x, x = var_4213_cast_fp16)[name = string("op_4233_cast_fp16")]; + tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_195_cast_fp16 = mul(x = var_4233_cast_fp16, y = const_197_to_fp16)[name = string("k_195_cast_fp16")]; + tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, -1, 16, 64])]; + tensor var_4240_cast_fp16 = reshape(shape = concat_432x, x = var_4216_cast_fp16)[name = string("op_4240_cast_fp16")]; + tensor var_4241 = const()[name = string("op_4241"), val = tensor([0, 2, 1, 3])]; + bool qk_115_transpose_x_0 = const()[name = string("qk_115_transpose_x_0"), val = bool(false)]; + bool qk_115_transpose_y_0 = const()[name = string("qk_115_transpose_y_0"), val = bool(false)]; + tensor transpose_269_perm_0 = const()[name = string("transpose_269_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_270_perm_0 = const()[name = string("transpose_270_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_270 = transpose(perm = transpose_270_perm_0, x = k_195_cast_fp16)[name = string("transpose_326")]; + tensor transpose_269 = transpose(perm = transpose_269_perm_0, x = q_155_cast_fp16)[name = string("transpose_327")]; + tensor qk_115_cast_fp16 = matmul(transpose_x = qk_115_transpose_x_0, transpose_y = qk_115_transpose_y_0, x = transpose_269, y = transpose_270)[name = string("qk_115_cast_fp16")]; + int32 concat_433_values1_0 = const()[name = string("concat_433_values1_0"), val = int32(448)]; + int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; + bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; + tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_230_cast_uint16_to_int32, concat_433_values1_0))[name = string("concat_433")]; + tensor var_4244_begin_0 = const()[name = string("op_4244_begin_0"), val = tensor([0, 0])]; + tensor var_4244_end_mask_0 = const()[name = string("op_4244_end_mask_0"), val = tensor([false, true])]; + tensor var_4244_cast_fp16 = slice_by_index(begin = var_4244_begin_0, end = concat_433, end_mask = var_4244_end_mask_0, x = mask_to_fp16)[name = string("op_4244_cast_fp16")]; + int32 concat_434_values0_0 = const()[name = string("concat_434_values0_0"), val = int32(0)]; + int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; + bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; + tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (concat_434_values0_0, gather_230_cast_uint16_to_int32))[name = string("concat_434")]; + tensor var_4245_begin_0 = const()[name = string("op_4245_begin_0"), val = tensor([0, 0])]; + tensor var_4245_end_mask_0 = const()[name = string("op_4245_end_mask_0"), val = tensor([true, false])]; + tensor var_4245_cast_fp16 = slice_by_index(begin = var_4245_begin_0, end = concat_434, end_mask = var_4245_end_mask_0, x = var_4244_cast_fp16)[name = string("op_4245_cast_fp16")]; + tensor qk_117_cast_fp16 = add(x = qk_115_cast_fp16, y = var_4245_cast_fp16)[name = string("qk_117_cast_fp16")]; + tensor var_4248_cast_fp16 = softmax(axis = var_4157, x = qk_117_cast_fp16)[name = string("op_4248_cast_fp16")]; + bool var_4250_transpose_x_0 = const()[name = string("op_4250_transpose_x_0"), val = bool(false)]; + bool var_4250_transpose_y_0 = const()[name = string("op_4250_transpose_y_0"), val = bool(false)]; + tensor v_195_cast_fp16 = transpose(perm = var_4241, x = var_4240_cast_fp16)[name = string("transpose_328")]; + tensor var_4250_cast_fp16 = matmul(transpose_x = var_4250_transpose_x_0, transpose_y = var_4250_transpose_y_0, x = var_4248_cast_fp16, y = v_195_cast_fp16)[name = string("op_4250_cast_fp16")]; + tensor var_4251 = const()[name = string("op_4251"), val = tensor([0, 2, 1, 3])]; + tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, -1, 1024])]; + tensor var_4252_cast_fp16 = transpose(perm = var_4251, x = var_4250_cast_fp16)[name = string("transpose_325")]; + tensor x_349_cast_fp16 = reshape(shape = concat_435x, x = var_4252_cast_fp16)[name = string("x_349_cast_fp16")]; + tensor var_4256_to_fp16 = const()[name = string("op_4256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675403520)))]; + tensor var_4257_to_fp16 = const()[name = string("op_4257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677500736)))]; + tensor linear_155_cast_fp16 = linear(bias = var_4257_to_fp16, weight = var_4256_to_fp16, x = x_349_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor x_351_cast_fp16 = add(x = x_345_cast_fp16, y = linear_155_cast_fp16)[name = string("x_351_cast_fp16")]; + tensor var_4264_axes_0 = const()[name = string("op_4264_axes_0"), val = tensor([-1])]; + tensor blocks_19_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677502848)))]; + tensor blocks_19_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677504960)))]; + tensor var_4264_cast_fp16 = layer_norm(axes = var_4264_axes_0, beta = blocks_19_cross_attn_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_cross_attn_ln_weight_to_fp16, x = x_351_cast_fp16)[name = string("op_4264_cast_fp16")]; + tensor var_4273_to_fp16 = const()[name = string("op_4273_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(677507072)))]; + tensor var_4274_to_fp16 = const()[name = string("op_4274_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679604288)))]; + tensor linear_156_cast_fp16 = linear(bias = var_4274_to_fp16, weight = var_4273_to_fp16, x = var_4264_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_436 = const()[name = string("concat_436"), val = tensor([0, 0, 0])]; + tensor concat_437 = const()[name = string("concat_437"), val = tensor([0, 1500, 0])]; + tensor k_197_internal_tensor_assign_1_stride_0 = const()[name = string("k_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_436, begin_mask = k_197_internal_tensor_assign_1_begin_mask_0, end = concat_437, end_mask = k_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_197_internal_tensor_assign_1_squeeze_mask_0, stride = k_197_internal_tensor_assign_1_stride_0, update = k_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("k_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_438 = const()[name = string("concat_438"), val = tensor([0, 0, 0])]; + tensor concat_439 = const()[name = string("concat_439"), val = tensor([0, 1500, 0])]; + tensor v_197_internal_tensor_assign_1_stride_0 = const()[name = string("v_197_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_197_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_197_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_197_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_197_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_438, begin_mask = v_197_internal_tensor_assign_1_begin_mask_0, end = concat_439, end_mask = v_197_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_197_internal_tensor_assign_1_squeeze_mask_0, stride = v_197_internal_tensor_assign_1_stride_0, update = v_cache_79_cast_fp16, x = k_7_to_fp16)[name = string("v_197_internal_tensor_assign_1_cast_fp16")]; + tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 16, 64])]; + tensor var_4294_cast_fp16 = reshape(shape = concat_440x, x = linear_156_cast_fp16)[name = string("op_4294_cast_fp16")]; + tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_159_cast_fp16 = mul(x = var_4294_cast_fp16, y = const_198_to_fp16)[name = string("q_159_cast_fp16")]; + tensor var_4300 = const()[name = string("op_4300"), val = tensor([1, 1500, 16, -1])]; + tensor var_4301_cast_fp16 = reshape(shape = var_4300, x = k_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4301_cast_fp16")]; + tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_199_cast_fp16 = mul(x = var_4301_cast_fp16, y = const_199_to_fp16)[name = string("k_199_cast_fp16")]; + tensor var_4307 = const()[name = string("op_4307"), val = tensor([1, 1500, 16, -1])]; + tensor var_4308_cast_fp16 = reshape(shape = var_4307, x = v_197_internal_tensor_assign_1_cast_fp16)[name = string("op_4308_cast_fp16")]; + tensor var_4309 = const()[name = string("op_4309"), val = tensor([0, 2, 1, 3])]; + bool qk_119_transpose_x_0 = const()[name = string("qk_119_transpose_x_0"), val = bool(false)]; + bool qk_119_transpose_y_0 = const()[name = string("qk_119_transpose_y_0"), val = bool(false)]; + tensor transpose_271_perm_0 = const()[name = string("transpose_271_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_272_perm_0 = const()[name = string("transpose_272_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_272 = transpose(perm = transpose_272_perm_0, x = k_199_cast_fp16)[name = string("transpose_322")]; + tensor transpose_271 = transpose(perm = transpose_271_perm_0, x = q_159_cast_fp16)[name = string("transpose_323")]; + tensor qk_119_cast_fp16 = matmul(transpose_x = qk_119_transpose_x_0, transpose_y = qk_119_transpose_y_0, x = transpose_271, y = transpose_272)[name = string("qk_119_cast_fp16")]; + tensor var_4313_cast_fp16 = softmax(axis = var_4157, x = qk_119_cast_fp16)[name = string("op_4313_cast_fp16")]; + bool var_4315_transpose_x_0 = const()[name = string("op_4315_transpose_x_0"), val = bool(false)]; + bool var_4315_transpose_y_0 = const()[name = string("op_4315_transpose_y_0"), val = bool(false)]; + tensor v_199_cast_fp16 = transpose(perm = var_4309, x = var_4308_cast_fp16)[name = string("transpose_324")]; + tensor var_4315_cast_fp16 = matmul(transpose_x = var_4315_transpose_x_0, transpose_y = var_4315_transpose_y_0, x = var_4313_cast_fp16, y = v_199_cast_fp16)[name = string("op_4315_cast_fp16")]; + tensor var_4316 = const()[name = string("op_4316"), val = tensor([0, 2, 1, 3])]; + tensor concat_441x = const()[name = string("concat_441x"), val = tensor([1, -1, 1024])]; + tensor var_4317_cast_fp16 = transpose(perm = var_4316, x = var_4315_cast_fp16)[name = string("transpose_321")]; + tensor x_355_cast_fp16 = reshape(shape = concat_441x, x = var_4317_cast_fp16)[name = string("x_355_cast_fp16")]; + tensor var_4321_to_fp16 = const()[name = string("op_4321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(679606400)))]; + tensor var_4322_to_fp16 = const()[name = string("op_4322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681703616)))]; + tensor linear_157_cast_fp16 = linear(bias = var_4322_to_fp16, weight = var_4321_to_fp16, x = x_355_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor x_357_cast_fp16 = add(x = x_351_cast_fp16, y = linear_157_cast_fp16)[name = string("x_357_cast_fp16")]; + tensor var_4329_axes_0 = const()[name = string("op_4329_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681705728)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681707840)))]; + tensor var_4329_cast_fp16 = layer_norm(axes = var_4329_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_4163_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_357_cast_fp16)[name = string("op_4329_cast_fp16")]; + tensor var_4338_to_fp16 = const()[name = string("op_4338_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(681709952)))]; + tensor var_4339_to_fp16 = const()[name = string("op_4339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690098624)))]; + tensor linear_158_cast_fp16 = linear(bias = var_4339_to_fp16, weight = var_4338_to_fp16, x = var_4329_cast_fp16)[name = string("linear_158_cast_fp16")]; + string x_361_mode_0 = const()[name = string("x_361_mode_0"), val = string("EXACT")]; + tensor x_361_cast_fp16 = gelu(mode = x_361_mode_0, x = linear_158_cast_fp16)[name = string("x_361_cast_fp16")]; + tensor var_4344_to_fp16 = const()[name = string("op_4344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(690106880)))]; + tensor var_4345_to_fp16 = const()[name = string("op_4345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698495552)))]; + tensor linear_159_cast_fp16 = linear(bias = var_4345_to_fp16, weight = var_4344_to_fp16, x = x_361_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_159_cast_fp16)[name = string("x_363_cast_fp16")]; + tensor k_cache_81_begin_0 = const()[name = string("k_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_81_end_0 = const()[name = string("k_cache_81_end_0"), val = tensor([21, 1, 448, 1024])]; + tensor k_cache_81_end_mask_0 = const()[name = string("k_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_81_squeeze_mask_0 = const()[name = string("k_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_81_cast_fp16 = slice_by_index(begin = k_cache_81_begin_0, end = k_cache_81_end_0, end_mask = k_cache_81_end_mask_0, squeeze_mask = k_cache_81_squeeze_mask_0, x = coreml_update_state_86)[name = string("k_cache_81_cast_fp16")]; + tensor v_cache_81_begin_0 = const()[name = string("v_cache_81_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_81_end_0 = const()[name = string("v_cache_81_end_0"), val = tensor([21, 1, 448, 1024])]; + tensor v_cache_81_end_mask_0 = const()[name = string("v_cache_81_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_81_squeeze_mask_0 = const()[name = string("v_cache_81_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_81_cast_fp16 = slice_by_index(begin = v_cache_81_begin_0, end = v_cache_81_end_0, end_mask = v_cache_81_end_mask_0, squeeze_mask = v_cache_81_squeeze_mask_0, x = coreml_update_state_87)[name = string("v_cache_81_cast_fp16")]; + tensor k_cache_83_begin_0 = const()[name = string("k_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor k_cache_83_end_0 = const()[name = string("k_cache_83_end_0"), val = tensor([21, 1, 1500, 1024])]; + tensor k_cache_83_end_mask_0 = const()[name = string("k_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_83_squeeze_mask_0 = const()[name = string("k_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_83_cast_fp16 = slice_by_index(begin = k_cache_83_begin_0, end = k_cache_83_end_0, end_mask = k_cache_83_end_mask_0, squeeze_mask = k_cache_83_squeeze_mask_0, x = read_state_2)[name = string("k_cache_83_cast_fp16")]; + tensor v_cache_83_begin_0 = const()[name = string("v_cache_83_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor v_cache_83_end_0 = const()[name = string("v_cache_83_end_0"), val = tensor([21, 1, 1500, 1024])]; + tensor v_cache_83_end_mask_0 = const()[name = string("v_cache_83_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_83_squeeze_mask_0 = const()[name = string("v_cache_83_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_83_cast_fp16 = slice_by_index(begin = v_cache_83_begin_0, end = v_cache_83_end_0, end_mask = v_cache_83_end_mask_0, squeeze_mask = v_cache_83_squeeze_mask_0, x = read_state_3)[name = string("v_cache_83_cast_fp16")]; + int32 var_4368 = const()[name = string("op_4368"), val = int32(-1)]; + tensor var_4386_axes_0 = const()[name = string("op_4386_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698497664)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698499776)))]; + fp16 var_4374_to_fp16 = const()[name = string("op_4374_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4386_cast_fp16 = layer_norm(axes = var_4386_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_363_cast_fp16)[name = string("op_4386_cast_fp16")]; + tensor var_4397_to_fp16 = const()[name = string("op_4397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698501888)))]; + tensor var_4398_to_fp16 = const()[name = string("op_4398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700599104)))]; + tensor linear_160_cast_fp16 = linear(bias = var_4398_to_fp16, weight = var_4397_to_fp16, x = var_4386_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor var_4401_to_fp16 = const()[name = string("op_4401_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(700601216)))]; + tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4401_to_fp16, x = var_4386_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor var_4405_to_fp16 = const()[name = string("op_4405_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(702698432)))]; + tensor var_4406_to_fp16 = const()[name = string("op_4406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704795648)))]; + tensor linear_162_cast_fp16 = linear(bias = var_4406_to_fp16, weight = var_4405_to_fp16, x = var_4386_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor var_4408_shape_cast_fp16 = shape(x = linear_160_cast_fp16)[name = string("op_4408_shape_cast_fp16")]; + int32 gather_242_axis_0 = const()[name = string("gather_242_axis_0"), val = int32(0)]; + int32 gather_242_batch_dims_0 = const()[name = string("gather_242_batch_dims_0"), val = int32(0)]; + bool gather_242_validate_indices_0 = const()[name = string("gather_242_validate_indices_0"), val = bool(false)]; + string var_4408_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4408_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_242_to_uint16 = const()[name = string("select_242_to_uint16"), val = uint16(1)]; + tensor var_4408_shape_cast_fp16_to_uint16 = cast(dtype = var_4408_shape_cast_fp16_to_uint16_dtype_0, x = var_4408_shape_cast_fp16)[name = string("cast_254")]; + uint16 gather_242_cast_uint16 = gather(axis = gather_242_axis_0, batch_dims = gather_242_batch_dims_0, indices = select_242_to_uint16, validate_indices = gather_242_validate_indices_0, x = var_4408_shape_cast_fp16_to_uint16)[name = string("gather_242_cast_uint16")]; + string gather_242_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_242_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_242_cast_uint16_to_int32 = cast(dtype = gather_242_cast_uint16_to_int32_dtype_0, x = gather_242_cast_uint16)[name = string("cast_253")]; + int32 end_step_43 = add(x = offset, y = gather_242_cast_uint16_to_int32)[name = string("end_step_43")]; + tensor expand_dims_320 = const()[name = string("expand_dims_320"), val = tensor([0])]; + tensor expand_dims_322 = const()[name = string("expand_dims_322"), val = tensor([0])]; + tensor expand_dims_323_axes_0 = const()[name = string("expand_dims_323_axes_0"), val = tensor([0])]; + tensor expand_dims_323 = expand_dims(axes = expand_dims_323_axes_0, x = end_step_43)[name = string("expand_dims_323")]; + tensor concat_444_values0_0 = const()[name = string("concat_444_values0_0"), val = tensor([20])]; + int32 concat_444_axis_0 = const()[name = string("concat_444_axis_0"), val = int32(0)]; + bool concat_444_interleave_0 = const()[name = string("concat_444_interleave_0"), val = bool(false)]; + tensor concat_444 = concat(axis = concat_444_axis_0, interleave = concat_444_interleave_0, values = (concat_444_values0_0, expand_dims_320, expand_dims_1, expand_dims_322))[name = string("concat_444")]; + tensor concat_445_values0_0 = const()[name = string("concat_445_values0_0"), val = tensor([0])]; + tensor concat_445_values1_0 = const()[name = string("concat_445_values1_0"), val = tensor([0])]; + tensor concat_445_values3_0 = const()[name = string("concat_445_values3_0"), val = tensor([0])]; + int32 concat_445_axis_0 = const()[name = string("concat_445_axis_0"), val = int32(0)]; + bool concat_445_interleave_0 = const()[name = string("concat_445_interleave_0"), val = bool(false)]; + tensor concat_445 = concat(axis = concat_445_axis_0, interleave = concat_445_interleave_0, values = (concat_445_values0_0, concat_445_values1_0, expand_dims_323, concat_445_values3_0))[name = string("concat_445")]; + tensor k_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = k_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = k_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_21_stride_0, update = linear_161_cast_fp16, x = coreml_update_state_86)[name = string("k_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_21_cast_fp16, input = k_cache1)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = k_cache1)[name = string("coreml_update_state_88")]; + tensor v_cache1_internal_tensor_assign_21_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_21_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_444, begin_mask = v_cache1_internal_tensor_assign_21_begin_mask_0, end = concat_445, end_mask = v_cache1_internal_tensor_assign_21_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_21_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_21_stride_0, update = linear_162_cast_fp16, x = coreml_update_state_87)[name = string("v_cache1_internal_tensor_assign_21_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_21_cast_fp16, input = v_cache1)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = v_cache1)[name = string("coreml_update_state_89")]; + int32 concat_450_values0_0 = const()[name = string("concat_450_values0_0"), val = int32(1)]; + int32 concat_450_values2_0 = const()[name = string("concat_450_values2_0"), val = int32(1024)]; + int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; + bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; + tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (concat_450_values0_0, end_step_43, concat_450_values2_0))[name = string("concat_450")]; + tensor var_4424_begin_0 = const()[name = string("op_4424_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4424_end_mask_0 = const()[name = string("op_4424_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4424_cast_fp16 = slice_by_index(begin = var_4424_begin_0, end = concat_450, end_mask = var_4424_end_mask_0, x = k_cache_81_cast_fp16)[name = string("op_4424_cast_fp16")]; + tensor var_4427_begin_0 = const()[name = string("op_4427_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4427_end_mask_0 = const()[name = string("op_4427_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4427_cast_fp16 = slice_by_index(begin = var_4427_begin_0, end = concat_450, end_mask = var_4427_end_mask_0, x = v_cache_81_cast_fp16)[name = string("op_4427_cast_fp16")]; + tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, -1, 16, 64])]; + tensor var_4437_cast_fp16 = reshape(shape = concat_452x, x = linear_160_cast_fp16)[name = string("op_4437_cast_fp16")]; + tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_163_cast_fp16 = mul(x = var_4437_cast_fp16, y = const_200_to_fp16)[name = string("q_163_cast_fp16")]; + tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, -1, 16, 64])]; + tensor var_4444_cast_fp16 = reshape(shape = concat_453x, x = var_4424_cast_fp16)[name = string("op_4444_cast_fp16")]; + tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_205_cast_fp16 = mul(x = var_4444_cast_fp16, y = const_201_to_fp16)[name = string("k_205_cast_fp16")]; + tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, -1, 16, 64])]; + tensor var_4451_cast_fp16 = reshape(shape = concat_454x, x = var_4427_cast_fp16)[name = string("op_4451_cast_fp16")]; + tensor var_4452 = const()[name = string("op_4452"), val = tensor([0, 2, 1, 3])]; + bool qk_121_transpose_x_0 = const()[name = string("qk_121_transpose_x_0"), val = bool(false)]; + bool qk_121_transpose_y_0 = const()[name = string("qk_121_transpose_y_0"), val = bool(false)]; + tensor transpose_273_perm_0 = const()[name = string("transpose_273_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_274_perm_0 = const()[name = string("transpose_274_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_274 = transpose(perm = transpose_274_perm_0, x = k_205_cast_fp16)[name = string("transpose_318")]; + tensor transpose_273 = transpose(perm = transpose_273_perm_0, x = q_163_cast_fp16)[name = string("transpose_319")]; + tensor qk_121_cast_fp16 = matmul(transpose_x = qk_121_transpose_x_0, transpose_y = qk_121_transpose_y_0, x = transpose_273, y = transpose_274)[name = string("qk_121_cast_fp16")]; + int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(448)]; + int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; + bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; + tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_242_cast_uint16_to_int32, concat_455_values1_0))[name = string("concat_455")]; + tensor var_4455_begin_0 = const()[name = string("op_4455_begin_0"), val = tensor([0, 0])]; + tensor var_4455_end_mask_0 = const()[name = string("op_4455_end_mask_0"), val = tensor([false, true])]; + tensor var_4455_cast_fp16 = slice_by_index(begin = var_4455_begin_0, end = concat_455, end_mask = var_4455_end_mask_0, x = mask_to_fp16)[name = string("op_4455_cast_fp16")]; + int32 concat_456_values0_0 = const()[name = string("concat_456_values0_0"), val = int32(0)]; + int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; + bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; + tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (concat_456_values0_0, gather_242_cast_uint16_to_int32))[name = string("concat_456")]; + tensor var_4456_begin_0 = const()[name = string("op_4456_begin_0"), val = tensor([0, 0])]; + tensor var_4456_end_mask_0 = const()[name = string("op_4456_end_mask_0"), val = tensor([true, false])]; + tensor var_4456_cast_fp16 = slice_by_index(begin = var_4456_begin_0, end = concat_456, end_mask = var_4456_end_mask_0, x = var_4455_cast_fp16)[name = string("op_4456_cast_fp16")]; + tensor qk_123_cast_fp16 = add(x = qk_121_cast_fp16, y = var_4456_cast_fp16)[name = string("qk_123_cast_fp16")]; + tensor var_4459_cast_fp16 = softmax(axis = var_4368, x = qk_123_cast_fp16)[name = string("op_4459_cast_fp16")]; + bool var_4461_transpose_x_0 = const()[name = string("op_4461_transpose_x_0"), val = bool(false)]; + bool var_4461_transpose_y_0 = const()[name = string("op_4461_transpose_y_0"), val = bool(false)]; + tensor v_205_cast_fp16 = transpose(perm = var_4452, x = var_4451_cast_fp16)[name = string("transpose_320")]; + tensor var_4461_cast_fp16 = matmul(transpose_x = var_4461_transpose_x_0, transpose_y = var_4461_transpose_y_0, x = var_4459_cast_fp16, y = v_205_cast_fp16)[name = string("op_4461_cast_fp16")]; + tensor var_4462 = const()[name = string("op_4462"), val = tensor([0, 2, 1, 3])]; + tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 1024])]; + tensor var_4463_cast_fp16 = transpose(perm = var_4462, x = var_4461_cast_fp16)[name = string("transpose_317")]; + tensor x_367_cast_fp16 = reshape(shape = concat_457x, x = var_4463_cast_fp16)[name = string("x_367_cast_fp16")]; + tensor var_4467_to_fp16 = const()[name = string("op_4467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(704797760)))]; + tensor var_4468_to_fp16 = const()[name = string("op_4468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706894976)))]; + tensor linear_163_cast_fp16 = linear(bias = var_4468_to_fp16, weight = var_4467_to_fp16, x = x_367_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor x_369_cast_fp16 = add(x = x_363_cast_fp16, y = linear_163_cast_fp16)[name = string("x_369_cast_fp16")]; + tensor var_4475_axes_0 = const()[name = string("op_4475_axes_0"), val = tensor([-1])]; + tensor blocks_20_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706897088)))]; + tensor blocks_20_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706899200)))]; + tensor var_4475_cast_fp16 = layer_norm(axes = var_4475_axes_0, beta = blocks_20_cross_attn_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_cross_attn_ln_weight_to_fp16, x = x_369_cast_fp16)[name = string("op_4475_cast_fp16")]; + tensor var_4484_to_fp16 = const()[name = string("op_4484_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(706901312)))]; + tensor var_4485_to_fp16 = const()[name = string("op_4485_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(708998528)))]; + tensor linear_164_cast_fp16 = linear(bias = var_4485_to_fp16, weight = var_4484_to_fp16, x = var_4475_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor concat_458 = const()[name = string("concat_458"), val = tensor([0, 0, 0])]; + tensor concat_459 = const()[name = string("concat_459"), val = tensor([0, 1500, 0])]; + tensor k_207_internal_tensor_assign_1_stride_0 = const()[name = string("k_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_458, begin_mask = k_207_internal_tensor_assign_1_begin_mask_0, end = concat_459, end_mask = k_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_207_internal_tensor_assign_1_squeeze_mask_0, stride = k_207_internal_tensor_assign_1_stride_0, update = k_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("k_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_460 = const()[name = string("concat_460"), val = tensor([0, 0, 0])]; + tensor concat_461 = const()[name = string("concat_461"), val = tensor([0, 1500, 0])]; + tensor v_207_internal_tensor_assign_1_stride_0 = const()[name = string("v_207_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_207_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_207_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_207_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_207_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_460, begin_mask = v_207_internal_tensor_assign_1_begin_mask_0, end = concat_461, end_mask = v_207_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_207_internal_tensor_assign_1_squeeze_mask_0, stride = v_207_internal_tensor_assign_1_stride_0, update = v_cache_83_cast_fp16, x = k_7_to_fp16)[name = string("v_207_internal_tensor_assign_1_cast_fp16")]; + tensor concat_462x = const()[name = string("concat_462x"), val = tensor([1, -1, 16, 64])]; + tensor var_4505_cast_fp16 = reshape(shape = concat_462x, x = linear_164_cast_fp16)[name = string("op_4505_cast_fp16")]; + tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_167_cast_fp16 = mul(x = var_4505_cast_fp16, y = const_202_to_fp16)[name = string("q_167_cast_fp16")]; + tensor var_4511 = const()[name = string("op_4511"), val = tensor([1, 1500, 16, -1])]; + tensor var_4512_cast_fp16 = reshape(shape = var_4511, x = k_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4512_cast_fp16")]; + tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_209_cast_fp16 = mul(x = var_4512_cast_fp16, y = const_203_to_fp16)[name = string("k_209_cast_fp16")]; + tensor var_4518 = const()[name = string("op_4518"), val = tensor([1, 1500, 16, -1])]; + tensor var_4519_cast_fp16 = reshape(shape = var_4518, x = v_207_internal_tensor_assign_1_cast_fp16)[name = string("op_4519_cast_fp16")]; + tensor var_4520 = const()[name = string("op_4520"), val = tensor([0, 2, 1, 3])]; + bool qk_125_transpose_x_0 = const()[name = string("qk_125_transpose_x_0"), val = bool(false)]; + bool qk_125_transpose_y_0 = const()[name = string("qk_125_transpose_y_0"), val = bool(false)]; + tensor transpose_275_perm_0 = const()[name = string("transpose_275_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_276_perm_0 = const()[name = string("transpose_276_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_276 = transpose(perm = transpose_276_perm_0, x = k_209_cast_fp16)[name = string("transpose_314")]; + tensor transpose_275 = transpose(perm = transpose_275_perm_0, x = q_167_cast_fp16)[name = string("transpose_315")]; + tensor qk_125_cast_fp16 = matmul(transpose_x = qk_125_transpose_x_0, transpose_y = qk_125_transpose_y_0, x = transpose_275, y = transpose_276)[name = string("qk_125_cast_fp16")]; + tensor var_4524_cast_fp16 = softmax(axis = var_4368, x = qk_125_cast_fp16)[name = string("op_4524_cast_fp16")]; + bool var_4526_transpose_x_0 = const()[name = string("op_4526_transpose_x_0"), val = bool(false)]; + bool var_4526_transpose_y_0 = const()[name = string("op_4526_transpose_y_0"), val = bool(false)]; + tensor v_209_cast_fp16 = transpose(perm = var_4520, x = var_4519_cast_fp16)[name = string("transpose_316")]; + tensor var_4526_cast_fp16 = matmul(transpose_x = var_4526_transpose_x_0, transpose_y = var_4526_transpose_y_0, x = var_4524_cast_fp16, y = v_209_cast_fp16)[name = string("op_4526_cast_fp16")]; + tensor var_4527 = const()[name = string("op_4527"), val = tensor([0, 2, 1, 3])]; + tensor concat_463x = const()[name = string("concat_463x"), val = tensor([1, -1, 1024])]; + tensor var_4528_cast_fp16 = transpose(perm = var_4527, x = var_4526_cast_fp16)[name = string("transpose_313")]; + tensor x_373_cast_fp16 = reshape(shape = concat_463x, x = var_4528_cast_fp16)[name = string("x_373_cast_fp16")]; + tensor var_4532_to_fp16 = const()[name = string("op_4532_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(709000640)))]; + tensor var_4533_to_fp16 = const()[name = string("op_4533_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711097856)))]; + tensor linear_165_cast_fp16 = linear(bias = var_4533_to_fp16, weight = var_4532_to_fp16, x = x_373_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor x_375_cast_fp16 = add(x = x_369_cast_fp16, y = linear_165_cast_fp16)[name = string("x_375_cast_fp16")]; + tensor var_4540_axes_0 = const()[name = string("op_4540_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711099968)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711102080)))]; + tensor var_4540_cast_fp16 = layer_norm(axes = var_4540_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_4374_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_375_cast_fp16)[name = string("op_4540_cast_fp16")]; + tensor var_4549_to_fp16 = const()[name = string("op_4549_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711104192)))]; + tensor var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719492864)))]; + tensor linear_166_cast_fp16 = linear(bias = var_4550_to_fp16, weight = var_4549_to_fp16, x = var_4540_cast_fp16)[name = string("linear_166_cast_fp16")]; + string x_379_mode_0 = const()[name = string("x_379_mode_0"), val = string("EXACT")]; + tensor x_379_cast_fp16 = gelu(mode = x_379_mode_0, x = linear_166_cast_fp16)[name = string("x_379_cast_fp16")]; + tensor var_4555_to_fp16 = const()[name = string("op_4555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(719501120)))]; + tensor var_4556_to_fp16 = const()[name = string("op_4556_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727889792)))]; + tensor linear_167_cast_fp16 = linear(bias = var_4556_to_fp16, weight = var_4555_to_fp16, x = x_379_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor x_381_cast_fp16 = add(x = x_375_cast_fp16, y = linear_167_cast_fp16)[name = string("x_381_cast_fp16")]; + tensor k_cache_85_begin_0 = const()[name = string("k_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_85_end_0 = const()[name = string("k_cache_85_end_0"), val = tensor([22, 1, 448, 1024])]; + tensor k_cache_85_end_mask_0 = const()[name = string("k_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_85_squeeze_mask_0 = const()[name = string("k_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_85_cast_fp16 = slice_by_index(begin = k_cache_85_begin_0, end = k_cache_85_end_0, end_mask = k_cache_85_end_mask_0, squeeze_mask = k_cache_85_squeeze_mask_0, x = coreml_update_state_88)[name = string("k_cache_85_cast_fp16")]; + tensor v_cache_85_begin_0 = const()[name = string("v_cache_85_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_85_end_0 = const()[name = string("v_cache_85_end_0"), val = tensor([22, 1, 448, 1024])]; + tensor v_cache_85_end_mask_0 = const()[name = string("v_cache_85_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_85_squeeze_mask_0 = const()[name = string("v_cache_85_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_85_cast_fp16 = slice_by_index(begin = v_cache_85_begin_0, end = v_cache_85_end_0, end_mask = v_cache_85_end_mask_0, squeeze_mask = v_cache_85_squeeze_mask_0, x = coreml_update_state_89)[name = string("v_cache_85_cast_fp16")]; + tensor k_cache_87_begin_0 = const()[name = string("k_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor k_cache_87_end_0 = const()[name = string("k_cache_87_end_0"), val = tensor([22, 1, 1500, 1024])]; + tensor k_cache_87_end_mask_0 = const()[name = string("k_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_87_squeeze_mask_0 = const()[name = string("k_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_87_cast_fp16 = slice_by_index(begin = k_cache_87_begin_0, end = k_cache_87_end_0, end_mask = k_cache_87_end_mask_0, squeeze_mask = k_cache_87_squeeze_mask_0, x = read_state_2)[name = string("k_cache_87_cast_fp16")]; + tensor v_cache_87_begin_0 = const()[name = string("v_cache_87_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor v_cache_87_end_0 = const()[name = string("v_cache_87_end_0"), val = tensor([22, 1, 1500, 1024])]; + tensor v_cache_87_end_mask_0 = const()[name = string("v_cache_87_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_87_squeeze_mask_0 = const()[name = string("v_cache_87_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_87_cast_fp16 = slice_by_index(begin = v_cache_87_begin_0, end = v_cache_87_end_0, end_mask = v_cache_87_end_mask_0, squeeze_mask = v_cache_87_squeeze_mask_0, x = read_state_3)[name = string("v_cache_87_cast_fp16")]; + int32 var_4579 = const()[name = string("op_4579"), val = int32(-1)]; + tensor var_4597_axes_0 = const()[name = string("op_4597_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727891904)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727894016)))]; + fp16 var_4585_to_fp16 = const()[name = string("op_4585_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4597_cast_fp16 = layer_norm(axes = var_4597_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_381_cast_fp16)[name = string("op_4597_cast_fp16")]; + tensor var_4608_to_fp16 = const()[name = string("op_4608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(727896128)))]; + tensor var_4609_to_fp16 = const()[name = string("op_4609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729993344)))]; + tensor linear_168_cast_fp16 = linear(bias = var_4609_to_fp16, weight = var_4608_to_fp16, x = var_4597_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor var_4612_to_fp16 = const()[name = string("op_4612_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(729995456)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4612_to_fp16, x = var_4597_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor var_4616_to_fp16 = const()[name = string("op_4616_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732092672)))]; + tensor var_4617_to_fp16 = const()[name = string("op_4617_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734189888)))]; + tensor linear_170_cast_fp16 = linear(bias = var_4617_to_fp16, weight = var_4616_to_fp16, x = var_4597_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor var_4619_shape_cast_fp16 = shape(x = linear_168_cast_fp16)[name = string("op_4619_shape_cast_fp16")]; + int32 gather_254_axis_0 = const()[name = string("gather_254_axis_0"), val = int32(0)]; + int32 gather_254_batch_dims_0 = const()[name = string("gather_254_batch_dims_0"), val = int32(0)]; + bool gather_254_validate_indices_0 = const()[name = string("gather_254_validate_indices_0"), val = bool(false)]; + string var_4619_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4619_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_254_to_uint16 = const()[name = string("select_254_to_uint16"), val = uint16(1)]; + tensor var_4619_shape_cast_fp16_to_uint16 = cast(dtype = var_4619_shape_cast_fp16_to_uint16_dtype_0, x = var_4619_shape_cast_fp16)[name = string("cast_252")]; + uint16 gather_254_cast_uint16 = gather(axis = gather_254_axis_0, batch_dims = gather_254_batch_dims_0, indices = select_254_to_uint16, validate_indices = gather_254_validate_indices_0, x = var_4619_shape_cast_fp16_to_uint16)[name = string("gather_254_cast_uint16")]; + string gather_254_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_254_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_254_cast_uint16_to_int32 = cast(dtype = gather_254_cast_uint16_to_int32_dtype_0, x = gather_254_cast_uint16)[name = string("cast_251")]; + int32 end_step_45 = add(x = offset, y = gather_254_cast_uint16_to_int32)[name = string("end_step_45")]; + tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; + tensor expand_dims_338 = const()[name = string("expand_dims_338"), val = tensor([0])]; + tensor expand_dims_339_axes_0 = const()[name = string("expand_dims_339_axes_0"), val = tensor([0])]; + tensor expand_dims_339 = expand_dims(axes = expand_dims_339_axes_0, x = end_step_45)[name = string("expand_dims_339")]; + tensor concat_466_values0_0 = const()[name = string("concat_466_values0_0"), val = tensor([21])]; + int32 concat_466_axis_0 = const()[name = string("concat_466_axis_0"), val = int32(0)]; + bool concat_466_interleave_0 = const()[name = string("concat_466_interleave_0"), val = bool(false)]; + tensor concat_466 = concat(axis = concat_466_axis_0, interleave = concat_466_interleave_0, values = (concat_466_values0_0, expand_dims_336, expand_dims_1, expand_dims_338))[name = string("concat_466")]; + tensor concat_467_values0_0 = const()[name = string("concat_467_values0_0"), val = tensor([0])]; + tensor concat_467_values1_0 = const()[name = string("concat_467_values1_0"), val = tensor([0])]; + tensor concat_467_values3_0 = const()[name = string("concat_467_values3_0"), val = tensor([0])]; + int32 concat_467_axis_0 = const()[name = string("concat_467_axis_0"), val = int32(0)]; + bool concat_467_interleave_0 = const()[name = string("concat_467_interleave_0"), val = bool(false)]; + tensor concat_467 = concat(axis = concat_467_axis_0, interleave = concat_467_interleave_0, values = (concat_467_values0_0, concat_467_values1_0, expand_dims_339, concat_467_values3_0))[name = string("concat_467")]; + tensor k_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = k_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = k_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_22_stride_0, update = linear_169_cast_fp16, x = coreml_update_state_88)[name = string("k_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_22_cast_fp16, input = k_cache1)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = k_cache1)[name = string("coreml_update_state_90")]; + tensor v_cache1_internal_tensor_assign_22_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_22_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_466, begin_mask = v_cache1_internal_tensor_assign_22_begin_mask_0, end = concat_467, end_mask = v_cache1_internal_tensor_assign_22_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_22_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_22_stride_0, update = linear_170_cast_fp16, x = coreml_update_state_89)[name = string("v_cache1_internal_tensor_assign_22_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_22_cast_fp16, input = v_cache1)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = v_cache1)[name = string("coreml_update_state_91")]; + int32 concat_472_values0_0 = const()[name = string("concat_472_values0_0"), val = int32(1)]; + int32 concat_472_values2_0 = const()[name = string("concat_472_values2_0"), val = int32(1024)]; + int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; + bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; + tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (concat_472_values0_0, end_step_45, concat_472_values2_0))[name = string("concat_472")]; + tensor var_4635_begin_0 = const()[name = string("op_4635_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4635_end_mask_0 = const()[name = string("op_4635_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4635_cast_fp16 = slice_by_index(begin = var_4635_begin_0, end = concat_472, end_mask = var_4635_end_mask_0, x = k_cache_85_cast_fp16)[name = string("op_4635_cast_fp16")]; + tensor var_4638_begin_0 = const()[name = string("op_4638_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4638_end_mask_0 = const()[name = string("op_4638_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4638_cast_fp16 = slice_by_index(begin = var_4638_begin_0, end = concat_472, end_mask = var_4638_end_mask_0, x = v_cache_85_cast_fp16)[name = string("op_4638_cast_fp16")]; + tensor concat_474x = const()[name = string("concat_474x"), val = tensor([1, -1, 16, 64])]; + tensor var_4648_cast_fp16 = reshape(shape = concat_474x, x = linear_168_cast_fp16)[name = string("op_4648_cast_fp16")]; + tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_171_cast_fp16 = mul(x = var_4648_cast_fp16, y = const_204_to_fp16)[name = string("q_171_cast_fp16")]; + tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 16, 64])]; + tensor var_4655_cast_fp16 = reshape(shape = concat_475x, x = var_4635_cast_fp16)[name = string("op_4655_cast_fp16")]; + tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_215_cast_fp16 = mul(x = var_4655_cast_fp16, y = const_205_to_fp16)[name = string("k_215_cast_fp16")]; + tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 16, 64])]; + tensor var_4662_cast_fp16 = reshape(shape = concat_476x, x = var_4638_cast_fp16)[name = string("op_4662_cast_fp16")]; + tensor var_4663 = const()[name = string("op_4663"), val = tensor([0, 2, 1, 3])]; + bool qk_127_transpose_x_0 = const()[name = string("qk_127_transpose_x_0"), val = bool(false)]; + bool qk_127_transpose_y_0 = const()[name = string("qk_127_transpose_y_0"), val = bool(false)]; + tensor transpose_277_perm_0 = const()[name = string("transpose_277_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_278_perm_0 = const()[name = string("transpose_278_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_278 = transpose(perm = transpose_278_perm_0, x = k_215_cast_fp16)[name = string("transpose_310")]; + tensor transpose_277 = transpose(perm = transpose_277_perm_0, x = q_171_cast_fp16)[name = string("transpose_311")]; + tensor qk_127_cast_fp16 = matmul(transpose_x = qk_127_transpose_x_0, transpose_y = qk_127_transpose_y_0, x = transpose_277, y = transpose_278)[name = string("qk_127_cast_fp16")]; + int32 concat_477_values1_0 = const()[name = string("concat_477_values1_0"), val = int32(448)]; + int32 concat_477_axis_0 = const()[name = string("concat_477_axis_0"), val = int32(0)]; + bool concat_477_interleave_0 = const()[name = string("concat_477_interleave_0"), val = bool(false)]; + tensor concat_477 = concat(axis = concat_477_axis_0, interleave = concat_477_interleave_0, values = (gather_254_cast_uint16_to_int32, concat_477_values1_0))[name = string("concat_477")]; + tensor var_4666_begin_0 = const()[name = string("op_4666_begin_0"), val = tensor([0, 0])]; + tensor var_4666_end_mask_0 = const()[name = string("op_4666_end_mask_0"), val = tensor([false, true])]; + tensor var_4666_cast_fp16 = slice_by_index(begin = var_4666_begin_0, end = concat_477, end_mask = var_4666_end_mask_0, x = mask_to_fp16)[name = string("op_4666_cast_fp16")]; + int32 concat_478_values0_0 = const()[name = string("concat_478_values0_0"), val = int32(0)]; + int32 concat_478_axis_0 = const()[name = string("concat_478_axis_0"), val = int32(0)]; + bool concat_478_interleave_0 = const()[name = string("concat_478_interleave_0"), val = bool(false)]; + tensor concat_478 = concat(axis = concat_478_axis_0, interleave = concat_478_interleave_0, values = (concat_478_values0_0, gather_254_cast_uint16_to_int32))[name = string("concat_478")]; + tensor var_4667_begin_0 = const()[name = string("op_4667_begin_0"), val = tensor([0, 0])]; + tensor var_4667_end_mask_0 = const()[name = string("op_4667_end_mask_0"), val = tensor([true, false])]; + tensor var_4667_cast_fp16 = slice_by_index(begin = var_4667_begin_0, end = concat_478, end_mask = var_4667_end_mask_0, x = var_4666_cast_fp16)[name = string("op_4667_cast_fp16")]; + tensor qk_129_cast_fp16 = add(x = qk_127_cast_fp16, y = var_4667_cast_fp16)[name = string("qk_129_cast_fp16")]; + tensor var_4670_cast_fp16 = softmax(axis = var_4579, x = qk_129_cast_fp16)[name = string("op_4670_cast_fp16")]; + bool var_4672_transpose_x_0 = const()[name = string("op_4672_transpose_x_0"), val = bool(false)]; + bool var_4672_transpose_y_0 = const()[name = string("op_4672_transpose_y_0"), val = bool(false)]; + tensor v_215_cast_fp16 = transpose(perm = var_4663, x = var_4662_cast_fp16)[name = string("transpose_312")]; + tensor var_4672_cast_fp16 = matmul(transpose_x = var_4672_transpose_x_0, transpose_y = var_4672_transpose_y_0, x = var_4670_cast_fp16, y = v_215_cast_fp16)[name = string("op_4672_cast_fp16")]; + tensor var_4673 = const()[name = string("op_4673"), val = tensor([0, 2, 1, 3])]; + tensor concat_479x = const()[name = string("concat_479x"), val = tensor([1, -1, 1024])]; + tensor var_4674_cast_fp16 = transpose(perm = var_4673, x = var_4672_cast_fp16)[name = string("transpose_309")]; + tensor x_385_cast_fp16 = reshape(shape = concat_479x, x = var_4674_cast_fp16)[name = string("x_385_cast_fp16")]; + tensor var_4678_to_fp16 = const()[name = string("op_4678_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(734192000)))]; + tensor var_4679_to_fp16 = const()[name = string("op_4679_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736289216)))]; + tensor linear_171_cast_fp16 = linear(bias = var_4679_to_fp16, weight = var_4678_to_fp16, x = x_385_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor x_387_cast_fp16 = add(x = x_381_cast_fp16, y = linear_171_cast_fp16)[name = string("x_387_cast_fp16")]; + tensor var_4686_axes_0 = const()[name = string("op_4686_axes_0"), val = tensor([-1])]; + tensor blocks_21_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736291328)))]; + tensor blocks_21_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736293440)))]; + tensor var_4686_cast_fp16 = layer_norm(axes = var_4686_axes_0, beta = blocks_21_cross_attn_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_cross_attn_ln_weight_to_fp16, x = x_387_cast_fp16)[name = string("op_4686_cast_fp16")]; + tensor var_4695_to_fp16 = const()[name = string("op_4695_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736295552)))]; + tensor var_4696_to_fp16 = const()[name = string("op_4696_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738392768)))]; + tensor linear_172_cast_fp16 = linear(bias = var_4696_to_fp16, weight = var_4695_to_fp16, x = var_4686_cast_fp16)[name = string("linear_172_cast_fp16")]; + tensor concat_480 = const()[name = string("concat_480"), val = tensor([0, 0, 0])]; + tensor concat_481 = const()[name = string("concat_481"), val = tensor([0, 1500, 0])]; + tensor k_217_internal_tensor_assign_1_stride_0 = const()[name = string("k_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_480, begin_mask = k_217_internal_tensor_assign_1_begin_mask_0, end = concat_481, end_mask = k_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_217_internal_tensor_assign_1_squeeze_mask_0, stride = k_217_internal_tensor_assign_1_stride_0, update = k_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("k_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_482 = const()[name = string("concat_482"), val = tensor([0, 0, 0])]; + tensor concat_483 = const()[name = string("concat_483"), val = tensor([0, 1500, 0])]; + tensor v_217_internal_tensor_assign_1_stride_0 = const()[name = string("v_217_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_217_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_217_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_217_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_217_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_482, begin_mask = v_217_internal_tensor_assign_1_begin_mask_0, end = concat_483, end_mask = v_217_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_217_internal_tensor_assign_1_squeeze_mask_0, stride = v_217_internal_tensor_assign_1_stride_0, update = v_cache_87_cast_fp16, x = k_7_to_fp16)[name = string("v_217_internal_tensor_assign_1_cast_fp16")]; + tensor concat_484x = const()[name = string("concat_484x"), val = tensor([1, -1, 16, 64])]; + tensor var_4716_cast_fp16 = reshape(shape = concat_484x, x = linear_172_cast_fp16)[name = string("op_4716_cast_fp16")]; + tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_175_cast_fp16 = mul(x = var_4716_cast_fp16, y = const_206_to_fp16)[name = string("q_175_cast_fp16")]; + tensor var_4722 = const()[name = string("op_4722"), val = tensor([1, 1500, 16, -1])]; + tensor var_4723_cast_fp16 = reshape(shape = var_4722, x = k_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4723_cast_fp16")]; + tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_219_cast_fp16 = mul(x = var_4723_cast_fp16, y = const_207_to_fp16)[name = string("k_219_cast_fp16")]; + tensor var_4729 = const()[name = string("op_4729"), val = tensor([1, 1500, 16, -1])]; + tensor var_4730_cast_fp16 = reshape(shape = var_4729, x = v_217_internal_tensor_assign_1_cast_fp16)[name = string("op_4730_cast_fp16")]; + tensor var_4731 = const()[name = string("op_4731"), val = tensor([0, 2, 1, 3])]; + bool qk_131_transpose_x_0 = const()[name = string("qk_131_transpose_x_0"), val = bool(false)]; + bool qk_131_transpose_y_0 = const()[name = string("qk_131_transpose_y_0"), val = bool(false)]; + tensor transpose_279_perm_0 = const()[name = string("transpose_279_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_280_perm_0 = const()[name = string("transpose_280_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_280 = transpose(perm = transpose_280_perm_0, x = k_219_cast_fp16)[name = string("transpose_306")]; + tensor transpose_279 = transpose(perm = transpose_279_perm_0, x = q_175_cast_fp16)[name = string("transpose_307")]; + tensor qk_131_cast_fp16 = matmul(transpose_x = qk_131_transpose_x_0, transpose_y = qk_131_transpose_y_0, x = transpose_279, y = transpose_280)[name = string("qk_131_cast_fp16")]; + tensor var_4735_cast_fp16 = softmax(axis = var_4579, x = qk_131_cast_fp16)[name = string("op_4735_cast_fp16")]; + bool var_4737_transpose_x_0 = const()[name = string("op_4737_transpose_x_0"), val = bool(false)]; + bool var_4737_transpose_y_0 = const()[name = string("op_4737_transpose_y_0"), val = bool(false)]; + tensor v_219_cast_fp16 = transpose(perm = var_4731, x = var_4730_cast_fp16)[name = string("transpose_308")]; + tensor var_4737_cast_fp16 = matmul(transpose_x = var_4737_transpose_x_0, transpose_y = var_4737_transpose_y_0, x = var_4735_cast_fp16, y = v_219_cast_fp16)[name = string("op_4737_cast_fp16")]; + tensor var_4738 = const()[name = string("op_4738"), val = tensor([0, 2, 1, 3])]; + tensor concat_485x = const()[name = string("concat_485x"), val = tensor([1, -1, 1024])]; + tensor var_4739_cast_fp16 = transpose(perm = var_4738, x = var_4737_cast_fp16)[name = string("transpose_305")]; + tensor x_391_cast_fp16 = reshape(shape = concat_485x, x = var_4739_cast_fp16)[name = string("x_391_cast_fp16")]; + tensor var_4743_to_fp16 = const()[name = string("op_4743_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(738394880)))]; + tensor var_4744_to_fp16 = const()[name = string("op_4744_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740492096)))]; + tensor linear_173_cast_fp16 = linear(bias = var_4744_to_fp16, weight = var_4743_to_fp16, x = x_391_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor x_393_cast_fp16 = add(x = x_387_cast_fp16, y = linear_173_cast_fp16)[name = string("x_393_cast_fp16")]; + tensor var_4751_axes_0 = const()[name = string("op_4751_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740494208)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740496320)))]; + tensor var_4751_cast_fp16 = layer_norm(axes = var_4751_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_4585_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_393_cast_fp16)[name = string("op_4751_cast_fp16")]; + tensor var_4760_to_fp16 = const()[name = string("op_4760_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(740498432)))]; + tensor var_4761_to_fp16 = const()[name = string("op_4761_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748887104)))]; + tensor linear_174_cast_fp16 = linear(bias = var_4761_to_fp16, weight = var_4760_to_fp16, x = var_4751_cast_fp16)[name = string("linear_174_cast_fp16")]; + string x_397_mode_0 = const()[name = string("x_397_mode_0"), val = string("EXACT")]; + tensor x_397_cast_fp16 = gelu(mode = x_397_mode_0, x = linear_174_cast_fp16)[name = string("x_397_cast_fp16")]; + tensor var_4766_to_fp16 = const()[name = string("op_4766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(748895360)))]; + tensor var_4767_to_fp16 = const()[name = string("op_4767_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757284032)))]; + tensor linear_175_cast_fp16 = linear(bias = var_4767_to_fp16, weight = var_4766_to_fp16, x = x_397_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor x_399_cast_fp16 = add(x = x_393_cast_fp16, y = linear_175_cast_fp16)[name = string("x_399_cast_fp16")]; + tensor k_cache_89_begin_0 = const()[name = string("k_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_89_end_0 = const()[name = string("k_cache_89_end_0"), val = tensor([23, 1, 448, 1024])]; + tensor k_cache_89_end_mask_0 = const()[name = string("k_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_89_squeeze_mask_0 = const()[name = string("k_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_89_cast_fp16 = slice_by_index(begin = k_cache_89_begin_0, end = k_cache_89_end_0, end_mask = k_cache_89_end_mask_0, squeeze_mask = k_cache_89_squeeze_mask_0, x = coreml_update_state_90)[name = string("k_cache_89_cast_fp16")]; + tensor v_cache_89_begin_0 = const()[name = string("v_cache_89_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_89_end_0 = const()[name = string("v_cache_89_end_0"), val = tensor([23, 1, 448, 1024])]; + tensor v_cache_89_end_mask_0 = const()[name = string("v_cache_89_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_89_squeeze_mask_0 = const()[name = string("v_cache_89_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_89_cast_fp16 = slice_by_index(begin = v_cache_89_begin_0, end = v_cache_89_end_0, end_mask = v_cache_89_end_mask_0, squeeze_mask = v_cache_89_squeeze_mask_0, x = coreml_update_state_91)[name = string("v_cache_89_cast_fp16")]; + tensor k_cache_91_begin_0 = const()[name = string("k_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor k_cache_91_end_0 = const()[name = string("k_cache_91_end_0"), val = tensor([23, 1, 1500, 1024])]; + tensor k_cache_91_end_mask_0 = const()[name = string("k_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_91_squeeze_mask_0 = const()[name = string("k_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_91_cast_fp16 = slice_by_index(begin = k_cache_91_begin_0, end = k_cache_91_end_0, end_mask = k_cache_91_end_mask_0, squeeze_mask = k_cache_91_squeeze_mask_0, x = read_state_2)[name = string("k_cache_91_cast_fp16")]; + tensor v_cache_91_begin_0 = const()[name = string("v_cache_91_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor v_cache_91_end_0 = const()[name = string("v_cache_91_end_0"), val = tensor([23, 1, 1500, 1024])]; + tensor v_cache_91_end_mask_0 = const()[name = string("v_cache_91_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_91_squeeze_mask_0 = const()[name = string("v_cache_91_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_91_cast_fp16 = slice_by_index(begin = v_cache_91_begin_0, end = v_cache_91_end_0, end_mask = v_cache_91_end_mask_0, squeeze_mask = v_cache_91_squeeze_mask_0, x = read_state_3)[name = string("v_cache_91_cast_fp16")]; + int32 var_4790 = const()[name = string("op_4790"), val = int32(-1)]; + tensor var_4808_axes_0 = const()[name = string("op_4808_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757286144)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757288256)))]; + fp16 var_4796_to_fp16 = const()[name = string("op_4796_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4808_cast_fp16 = layer_norm(axes = var_4808_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_399_cast_fp16)[name = string("op_4808_cast_fp16")]; + tensor var_4819_to_fp16 = const()[name = string("op_4819_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757290368)))]; + tensor var_4820_to_fp16 = const()[name = string("op_4820_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759387584)))]; + tensor linear_176_cast_fp16 = linear(bias = var_4820_to_fp16, weight = var_4819_to_fp16, x = var_4808_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor var_4823_to_fp16 = const()[name = string("op_4823_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(759389696)))]; + tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_4823_to_fp16, x = var_4808_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor var_4827_to_fp16 = const()[name = string("op_4827_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(761486912)))]; + tensor var_4828_to_fp16 = const()[name = string("op_4828_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763584128)))]; + tensor linear_178_cast_fp16 = linear(bias = var_4828_to_fp16, weight = var_4827_to_fp16, x = var_4808_cast_fp16)[name = string("linear_178_cast_fp16")]; + tensor var_4830_shape_cast_fp16 = shape(x = linear_176_cast_fp16)[name = string("op_4830_shape_cast_fp16")]; + int32 gather_266_axis_0 = const()[name = string("gather_266_axis_0"), val = int32(0)]; + int32 gather_266_batch_dims_0 = const()[name = string("gather_266_batch_dims_0"), val = int32(0)]; + bool gather_266_validate_indices_0 = const()[name = string("gather_266_validate_indices_0"), val = bool(false)]; + string var_4830_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4830_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_266_to_uint16 = const()[name = string("select_266_to_uint16"), val = uint16(1)]; + tensor var_4830_shape_cast_fp16_to_uint16 = cast(dtype = var_4830_shape_cast_fp16_to_uint16_dtype_0, x = var_4830_shape_cast_fp16)[name = string("cast_250")]; + uint16 gather_266_cast_uint16 = gather(axis = gather_266_axis_0, batch_dims = gather_266_batch_dims_0, indices = select_266_to_uint16, validate_indices = gather_266_validate_indices_0, x = var_4830_shape_cast_fp16_to_uint16)[name = string("gather_266_cast_uint16")]; + string gather_266_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_266_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_266_cast_uint16_to_int32 = cast(dtype = gather_266_cast_uint16_to_int32_dtype_0, x = gather_266_cast_uint16)[name = string("cast_249")]; + int32 end_step_47 = add(x = offset, y = gather_266_cast_uint16_to_int32)[name = string("end_step_47")]; + tensor expand_dims_352 = const()[name = string("expand_dims_352"), val = tensor([0])]; + tensor expand_dims_354 = const()[name = string("expand_dims_354"), val = tensor([0])]; + tensor expand_dims_355_axes_0 = const()[name = string("expand_dims_355_axes_0"), val = tensor([0])]; + tensor expand_dims_355 = expand_dims(axes = expand_dims_355_axes_0, x = end_step_47)[name = string("expand_dims_355")]; + tensor concat_488_values0_0 = const()[name = string("concat_488_values0_0"), val = tensor([22])]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (concat_488_values0_0, expand_dims_352, expand_dims_1, expand_dims_354))[name = string("concat_488")]; + tensor concat_489_values0_0 = const()[name = string("concat_489_values0_0"), val = tensor([0])]; + tensor concat_489_values1_0 = const()[name = string("concat_489_values1_0"), val = tensor([0])]; + tensor concat_489_values3_0 = const()[name = string("concat_489_values3_0"), val = tensor([0])]; + int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; + bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; + tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (concat_489_values0_0, concat_489_values1_0, expand_dims_355, concat_489_values3_0))[name = string("concat_489")]; + tensor k_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = k_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = k_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_23_stride_0, update = linear_177_cast_fp16, x = coreml_update_state_90)[name = string("k_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_23_cast_fp16, input = k_cache1)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = k_cache1)[name = string("coreml_update_state_92")]; + tensor v_cache1_internal_tensor_assign_23_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_23_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_488, begin_mask = v_cache1_internal_tensor_assign_23_begin_mask_0, end = concat_489, end_mask = v_cache1_internal_tensor_assign_23_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_23_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_23_stride_0, update = linear_178_cast_fp16, x = coreml_update_state_91)[name = string("v_cache1_internal_tensor_assign_23_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_23_cast_fp16, input = v_cache1)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = v_cache1)[name = string("coreml_update_state_93")]; + int32 concat_494_values0_0 = const()[name = string("concat_494_values0_0"), val = int32(1)]; + int32 concat_494_values2_0 = const()[name = string("concat_494_values2_0"), val = int32(1024)]; + int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; + bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; + tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (concat_494_values0_0, end_step_47, concat_494_values2_0))[name = string("concat_494")]; + tensor var_4846_begin_0 = const()[name = string("op_4846_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4846_end_mask_0 = const()[name = string("op_4846_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4846_cast_fp16 = slice_by_index(begin = var_4846_begin_0, end = concat_494, end_mask = var_4846_end_mask_0, x = k_cache_89_cast_fp16)[name = string("op_4846_cast_fp16")]; + tensor var_4849_begin_0 = const()[name = string("op_4849_begin_0"), val = tensor([0, 0, 0])]; + tensor var_4849_end_mask_0 = const()[name = string("op_4849_end_mask_0"), val = tensor([true, false, true])]; + tensor var_4849_cast_fp16 = slice_by_index(begin = var_4849_begin_0, end = concat_494, end_mask = var_4849_end_mask_0, x = v_cache_89_cast_fp16)[name = string("op_4849_cast_fp16")]; + tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 16, 64])]; + tensor var_4859_cast_fp16 = reshape(shape = concat_496x, x = linear_176_cast_fp16)[name = string("op_4859_cast_fp16")]; + tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_179_cast_fp16 = mul(x = var_4859_cast_fp16, y = const_208_to_fp16)[name = string("q_179_cast_fp16")]; + tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 16, 64])]; + tensor var_4866_cast_fp16 = reshape(shape = concat_497x, x = var_4846_cast_fp16)[name = string("op_4866_cast_fp16")]; + tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_225_cast_fp16 = mul(x = var_4866_cast_fp16, y = const_209_to_fp16)[name = string("k_225_cast_fp16")]; + tensor concat_498x = const()[name = string("concat_498x"), val = tensor([1, -1, 16, 64])]; + tensor var_4873_cast_fp16 = reshape(shape = concat_498x, x = var_4849_cast_fp16)[name = string("op_4873_cast_fp16")]; + tensor var_4874 = const()[name = string("op_4874"), val = tensor([0, 2, 1, 3])]; + bool qk_133_transpose_x_0 = const()[name = string("qk_133_transpose_x_0"), val = bool(false)]; + bool qk_133_transpose_y_0 = const()[name = string("qk_133_transpose_y_0"), val = bool(false)]; + tensor transpose_281_perm_0 = const()[name = string("transpose_281_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_282_perm_0 = const()[name = string("transpose_282_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_282 = transpose(perm = transpose_282_perm_0, x = k_225_cast_fp16)[name = string("transpose_302")]; + tensor transpose_281 = transpose(perm = transpose_281_perm_0, x = q_179_cast_fp16)[name = string("transpose_303")]; + tensor qk_133_cast_fp16 = matmul(transpose_x = qk_133_transpose_x_0, transpose_y = qk_133_transpose_y_0, x = transpose_281, y = transpose_282)[name = string("qk_133_cast_fp16")]; + int32 concat_499_values1_0 = const()[name = string("concat_499_values1_0"), val = int32(448)]; + int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; + bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; + tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (gather_266_cast_uint16_to_int32, concat_499_values1_0))[name = string("concat_499")]; + tensor var_4877_begin_0 = const()[name = string("op_4877_begin_0"), val = tensor([0, 0])]; + tensor var_4877_end_mask_0 = const()[name = string("op_4877_end_mask_0"), val = tensor([false, true])]; + tensor var_4877_cast_fp16 = slice_by_index(begin = var_4877_begin_0, end = concat_499, end_mask = var_4877_end_mask_0, x = mask_to_fp16)[name = string("op_4877_cast_fp16")]; + int32 concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = int32(0)]; + int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; + bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; + tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, gather_266_cast_uint16_to_int32))[name = string("concat_500")]; + tensor var_4878_begin_0 = const()[name = string("op_4878_begin_0"), val = tensor([0, 0])]; + tensor var_4878_end_mask_0 = const()[name = string("op_4878_end_mask_0"), val = tensor([true, false])]; + tensor var_4878_cast_fp16 = slice_by_index(begin = var_4878_begin_0, end = concat_500, end_mask = var_4878_end_mask_0, x = var_4877_cast_fp16)[name = string("op_4878_cast_fp16")]; + tensor qk_135_cast_fp16 = add(x = qk_133_cast_fp16, y = var_4878_cast_fp16)[name = string("qk_135_cast_fp16")]; + tensor var_4881_cast_fp16 = softmax(axis = var_4790, x = qk_135_cast_fp16)[name = string("op_4881_cast_fp16")]; + bool var_4883_transpose_x_0 = const()[name = string("op_4883_transpose_x_0"), val = bool(false)]; + bool var_4883_transpose_y_0 = const()[name = string("op_4883_transpose_y_0"), val = bool(false)]; + tensor v_225_cast_fp16 = transpose(perm = var_4874, x = var_4873_cast_fp16)[name = string("transpose_304")]; + tensor var_4883_cast_fp16 = matmul(transpose_x = var_4883_transpose_x_0, transpose_y = var_4883_transpose_y_0, x = var_4881_cast_fp16, y = v_225_cast_fp16)[name = string("op_4883_cast_fp16")]; + tensor var_4884 = const()[name = string("op_4884"), val = tensor([0, 2, 1, 3])]; + tensor concat_501x = const()[name = string("concat_501x"), val = tensor([1, -1, 1024])]; + tensor var_4885_cast_fp16 = transpose(perm = var_4884, x = var_4883_cast_fp16)[name = string("transpose_301")]; + tensor x_403_cast_fp16 = reshape(shape = concat_501x, x = var_4885_cast_fp16)[name = string("x_403_cast_fp16")]; + tensor var_4889_to_fp16 = const()[name = string("op_4889_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(763586240)))]; + tensor var_4890_to_fp16 = const()[name = string("op_4890_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765683456)))]; + tensor linear_179_cast_fp16 = linear(bias = var_4890_to_fp16, weight = var_4889_to_fp16, x = x_403_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor x_405_cast_fp16 = add(x = x_399_cast_fp16, y = linear_179_cast_fp16)[name = string("x_405_cast_fp16")]; + tensor var_4897_axes_0 = const()[name = string("op_4897_axes_0"), val = tensor([-1])]; + tensor blocks_22_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765685568)))]; + tensor blocks_22_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765687680)))]; + tensor var_4897_cast_fp16 = layer_norm(axes = var_4897_axes_0, beta = blocks_22_cross_attn_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_cross_attn_ln_weight_to_fp16, x = x_405_cast_fp16)[name = string("op_4897_cast_fp16")]; + tensor var_4906_to_fp16 = const()[name = string("op_4906_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(765689792)))]; + tensor var_4907_to_fp16 = const()[name = string("op_4907_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767787008)))]; + tensor linear_180_cast_fp16 = linear(bias = var_4907_to_fp16, weight = var_4906_to_fp16, x = var_4897_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor concat_502 = const()[name = string("concat_502"), val = tensor([0, 0, 0])]; + tensor concat_503 = const()[name = string("concat_503"), val = tensor([0, 1500, 0])]; + tensor k_227_internal_tensor_assign_1_stride_0 = const()[name = string("k_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_502, begin_mask = k_227_internal_tensor_assign_1_begin_mask_0, end = concat_503, end_mask = k_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_227_internal_tensor_assign_1_squeeze_mask_0, stride = k_227_internal_tensor_assign_1_stride_0, update = k_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("k_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_504 = const()[name = string("concat_504"), val = tensor([0, 0, 0])]; + tensor concat_505 = const()[name = string("concat_505"), val = tensor([0, 1500, 0])]; + tensor v_227_internal_tensor_assign_1_stride_0 = const()[name = string("v_227_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_227_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_227_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_227_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_227_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_504, begin_mask = v_227_internal_tensor_assign_1_begin_mask_0, end = concat_505, end_mask = v_227_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_227_internal_tensor_assign_1_squeeze_mask_0, stride = v_227_internal_tensor_assign_1_stride_0, update = v_cache_91_cast_fp16, x = k_7_to_fp16)[name = string("v_227_internal_tensor_assign_1_cast_fp16")]; + tensor concat_506x = const()[name = string("concat_506x"), val = tensor([1, -1, 16, 64])]; + tensor var_4927_cast_fp16 = reshape(shape = concat_506x, x = linear_180_cast_fp16)[name = string("op_4927_cast_fp16")]; + tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_183_cast_fp16 = mul(x = var_4927_cast_fp16, y = const_210_to_fp16)[name = string("q_183_cast_fp16")]; + tensor var_4933 = const()[name = string("op_4933"), val = tensor([1, 1500, 16, -1])]; + tensor var_4934_cast_fp16 = reshape(shape = var_4933, x = k_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4934_cast_fp16")]; + tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_229_cast_fp16 = mul(x = var_4934_cast_fp16, y = const_211_to_fp16)[name = string("k_229_cast_fp16")]; + tensor var_4940 = const()[name = string("op_4940"), val = tensor([1, 1500, 16, -1])]; + tensor var_4941_cast_fp16 = reshape(shape = var_4940, x = v_227_internal_tensor_assign_1_cast_fp16)[name = string("op_4941_cast_fp16")]; + tensor var_4942 = const()[name = string("op_4942"), val = tensor([0, 2, 1, 3])]; + bool qk_137_transpose_x_0 = const()[name = string("qk_137_transpose_x_0"), val = bool(false)]; + bool qk_137_transpose_y_0 = const()[name = string("qk_137_transpose_y_0"), val = bool(false)]; + tensor transpose_283_perm_0 = const()[name = string("transpose_283_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_284_perm_0 = const()[name = string("transpose_284_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_284 = transpose(perm = transpose_284_perm_0, x = k_229_cast_fp16)[name = string("transpose_298")]; + tensor transpose_283 = transpose(perm = transpose_283_perm_0, x = q_183_cast_fp16)[name = string("transpose_299")]; + tensor qk_137_cast_fp16 = matmul(transpose_x = qk_137_transpose_x_0, transpose_y = qk_137_transpose_y_0, x = transpose_283, y = transpose_284)[name = string("qk_137_cast_fp16")]; + tensor var_4946_cast_fp16 = softmax(axis = var_4790, x = qk_137_cast_fp16)[name = string("op_4946_cast_fp16")]; + bool var_4948_transpose_x_0 = const()[name = string("op_4948_transpose_x_0"), val = bool(false)]; + bool var_4948_transpose_y_0 = const()[name = string("op_4948_transpose_y_0"), val = bool(false)]; + tensor v_229_cast_fp16 = transpose(perm = var_4942, x = var_4941_cast_fp16)[name = string("transpose_300")]; + tensor var_4948_cast_fp16 = matmul(transpose_x = var_4948_transpose_x_0, transpose_y = var_4948_transpose_y_0, x = var_4946_cast_fp16, y = v_229_cast_fp16)[name = string("op_4948_cast_fp16")]; + tensor var_4949 = const()[name = string("op_4949"), val = tensor([0, 2, 1, 3])]; + tensor concat_507x = const()[name = string("concat_507x"), val = tensor([1, -1, 1024])]; + tensor var_4950_cast_fp16 = transpose(perm = var_4949, x = var_4948_cast_fp16)[name = string("transpose_297")]; + tensor x_409_cast_fp16 = reshape(shape = concat_507x, x = var_4950_cast_fp16)[name = string("x_409_cast_fp16")]; + tensor var_4954_to_fp16 = const()[name = string("op_4954_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(767789120)))]; + tensor var_4955_to_fp16 = const()[name = string("op_4955_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769886336)))]; + tensor linear_181_cast_fp16 = linear(bias = var_4955_to_fp16, weight = var_4954_to_fp16, x = x_409_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor x_411_cast_fp16 = add(x = x_405_cast_fp16, y = linear_181_cast_fp16)[name = string("x_411_cast_fp16")]; + tensor var_4962_axes_0 = const()[name = string("op_4962_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769888448)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769890560)))]; + tensor var_4962_cast_fp16 = layer_norm(axes = var_4962_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_4796_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_411_cast_fp16)[name = string("op_4962_cast_fp16")]; + tensor var_4971_to_fp16 = const()[name = string("op_4971_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769892672)))]; + tensor var_4972_to_fp16 = const()[name = string("op_4972_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778281344)))]; + tensor linear_182_cast_fp16 = linear(bias = var_4972_to_fp16, weight = var_4971_to_fp16, x = var_4962_cast_fp16)[name = string("linear_182_cast_fp16")]; + string x_415_mode_0 = const()[name = string("x_415_mode_0"), val = string("EXACT")]; + tensor x_415_cast_fp16 = gelu(mode = x_415_mode_0, x = linear_182_cast_fp16)[name = string("x_415_cast_fp16")]; + tensor var_4977_to_fp16 = const()[name = string("op_4977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(778289600)))]; + tensor var_4978_to_fp16 = const()[name = string("op_4978_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786678272)))]; + tensor linear_183_cast_fp16 = linear(bias = var_4978_to_fp16, weight = var_4977_to_fp16, x = x_415_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor x_417_cast_fp16 = add(x = x_411_cast_fp16, y = linear_183_cast_fp16)[name = string("x_417_cast_fp16")]; + tensor k_cache_93_begin_0 = const()[name = string("k_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_93_end_0 = const()[name = string("k_cache_93_end_0"), val = tensor([24, 1, 448, 1024])]; + tensor k_cache_93_end_mask_0 = const()[name = string("k_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_93_squeeze_mask_0 = const()[name = string("k_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_93_cast_fp16 = slice_by_index(begin = k_cache_93_begin_0, end = k_cache_93_end_0, end_mask = k_cache_93_end_mask_0, squeeze_mask = k_cache_93_squeeze_mask_0, x = coreml_update_state_92)[name = string("k_cache_93_cast_fp16")]; + tensor v_cache_93_begin_0 = const()[name = string("v_cache_93_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_93_end_0 = const()[name = string("v_cache_93_end_0"), val = tensor([24, 1, 448, 1024])]; + tensor v_cache_93_end_mask_0 = const()[name = string("v_cache_93_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_93_squeeze_mask_0 = const()[name = string("v_cache_93_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_93_cast_fp16 = slice_by_index(begin = v_cache_93_begin_0, end = v_cache_93_end_0, end_mask = v_cache_93_end_mask_0, squeeze_mask = v_cache_93_squeeze_mask_0, x = coreml_update_state_93)[name = string("v_cache_93_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([24, 1, 1500, 1024])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([24, 1, 1500, 1024])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_5001 = const()[name = string("op_5001"), val = int32(-1)]; + tensor var_5019_axes_0 = const()[name = string("op_5019_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786680384)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786682496)))]; + fp16 var_5007_to_fp16 = const()[name = string("op_5007_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5019_cast_fp16 = layer_norm(axes = var_5019_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_417_cast_fp16)[name = string("op_5019_cast_fp16")]; + tensor var_5030_to_fp16 = const()[name = string("op_5030_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(786684608)))]; + tensor var_5031_to_fp16 = const()[name = string("op_5031_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788781824)))]; + tensor linear_184_cast_fp16 = linear(bias = var_5031_to_fp16, weight = var_5030_to_fp16, x = var_5019_cast_fp16)[name = string("linear_184_cast_fp16")]; + tensor var_5034_to_fp16 = const()[name = string("op_5034_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788783936)))]; + tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_5034_to_fp16, x = var_5019_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor var_5038_to_fp16 = const()[name = string("op_5038_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(790881152)))]; + tensor var_5039_to_fp16 = const()[name = string("op_5039_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792978368)))]; + tensor linear_186_cast_fp16 = linear(bias = var_5039_to_fp16, weight = var_5038_to_fp16, x = var_5019_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_5041_shape_cast_fp16 = shape(x = linear_184_cast_fp16)[name = string("op_5041_shape_cast_fp16")]; + int32 gather_278_axis_0 = const()[name = string("gather_278_axis_0"), val = int32(0)]; + int32 gather_278_batch_dims_0 = const()[name = string("gather_278_batch_dims_0"), val = int32(0)]; + bool gather_278_validate_indices_0 = const()[name = string("gather_278_validate_indices_0"), val = bool(false)]; + string var_5041_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5041_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_278_to_uint16 = const()[name = string("select_278_to_uint16"), val = uint16(1)]; + tensor var_5041_shape_cast_fp16_to_uint16 = cast(dtype = var_5041_shape_cast_fp16_to_uint16_dtype_0, x = var_5041_shape_cast_fp16)[name = string("cast_248")]; + uint16 gather_278_cast_uint16 = gather(axis = gather_278_axis_0, batch_dims = gather_278_batch_dims_0, indices = select_278_to_uint16, validate_indices = gather_278_validate_indices_0, x = var_5041_shape_cast_fp16_to_uint16)[name = string("gather_278_cast_uint16")]; + string gather_278_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_278_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_278_cast_uint16_to_int32 = cast(dtype = gather_278_cast_uint16_to_int32_dtype_0, x = gather_278_cast_uint16)[name = string("cast_247")]; + int32 end_step = add(x = offset, y = gather_278_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_368 = const()[name = string("expand_dims_368"), val = tensor([0])]; + tensor expand_dims_370 = const()[name = string("expand_dims_370"), val = tensor([0])]; + tensor expand_dims_371_axes_0 = const()[name = string("expand_dims_371_axes_0"), val = tensor([0])]; + tensor expand_dims_371 = expand_dims(axes = expand_dims_371_axes_0, x = end_step)[name = string("expand_dims_371")]; + tensor concat_510_values0_0 = const()[name = string("concat_510_values0_0"), val = tensor([23])]; + int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; + bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; + tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (concat_510_values0_0, expand_dims_368, expand_dims_1, expand_dims_370))[name = string("concat_510")]; + tensor concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = tensor([0])]; + tensor concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = tensor([0])]; + tensor concat_511_values3_0 = const()[name = string("concat_511_values3_0"), val = tensor([0])]; + int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; + bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; + tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, expand_dims_371, concat_511_values3_0))[name = string("concat_511")]; + tensor k_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = k_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = k_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_24_stride_0, update = linear_185_cast_fp16, x = coreml_update_state_92)[name = string("k_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_24_cast_fp16, input = k_cache1)[name = string("coreml_update_state_94_write_state")]; + tensor v_cache1_internal_tensor_assign_24_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_24_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_510, begin_mask = v_cache1_internal_tensor_assign_24_begin_mask_0, end = concat_511, end_mask = v_cache1_internal_tensor_assign_24_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_24_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_24_stride_0, update = linear_186_cast_fp16, x = coreml_update_state_93)[name = string("v_cache1_internal_tensor_assign_24_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_24_cast_fp16, input = v_cache1)[name = string("coreml_update_state_95_write_state")]; + int32 concat_516_values0_0 = const()[name = string("concat_516_values0_0"), val = int32(1)]; + int32 concat_516_values2_0 = const()[name = string("concat_516_values2_0"), val = int32(1024)]; + int32 concat_516_axis_0 = const()[name = string("concat_516_axis_0"), val = int32(0)]; + bool concat_516_interleave_0 = const()[name = string("concat_516_interleave_0"), val = bool(false)]; + tensor concat_516 = concat(axis = concat_516_axis_0, interleave = concat_516_interleave_0, values = (concat_516_values0_0, end_step, concat_516_values2_0))[name = string("concat_516")]; + tensor var_5057_begin_0 = const()[name = string("op_5057_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5057_end_mask_0 = const()[name = string("op_5057_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5057_cast_fp16 = slice_by_index(begin = var_5057_begin_0, end = concat_516, end_mask = var_5057_end_mask_0, x = k_cache_93_cast_fp16)[name = string("op_5057_cast_fp16")]; + tensor var_5060_begin_0 = const()[name = string("op_5060_begin_0"), val = tensor([0, 0, 0])]; + tensor var_5060_end_mask_0 = const()[name = string("op_5060_end_mask_0"), val = tensor([true, false, true])]; + tensor var_5060_cast_fp16 = slice_by_index(begin = var_5060_begin_0, end = concat_516, end_mask = var_5060_end_mask_0, x = v_cache_93_cast_fp16)[name = string("op_5060_cast_fp16")]; + tensor concat_518x = const()[name = string("concat_518x"), val = tensor([1, -1, 16, 64])]; + tensor var_5070_cast_fp16 = reshape(shape = concat_518x, x = linear_184_cast_fp16)[name = string("op_5070_cast_fp16")]; + tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_187_cast_fp16 = mul(x = var_5070_cast_fp16, y = const_212_to_fp16)[name = string("q_187_cast_fp16")]; + tensor concat_519x = const()[name = string("concat_519x"), val = tensor([1, -1, 16, 64])]; + tensor var_5077_cast_fp16 = reshape(shape = concat_519x, x = var_5057_cast_fp16)[name = string("op_5077_cast_fp16")]; + tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_235_cast_fp16 = mul(x = var_5077_cast_fp16, y = const_213_to_fp16)[name = string("k_235_cast_fp16")]; + tensor concat_520x = const()[name = string("concat_520x"), val = tensor([1, -1, 16, 64])]; + tensor var_5084_cast_fp16 = reshape(shape = concat_520x, x = var_5060_cast_fp16)[name = string("op_5084_cast_fp16")]; + tensor var_5085 = const()[name = string("op_5085"), val = tensor([0, 2, 1, 3])]; + bool qk_139_transpose_x_0 = const()[name = string("qk_139_transpose_x_0"), val = bool(false)]; + bool qk_139_transpose_y_0 = const()[name = string("qk_139_transpose_y_0"), val = bool(false)]; + tensor transpose_285_perm_0 = const()[name = string("transpose_285_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_286_perm_0 = const()[name = string("transpose_286_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_286 = transpose(perm = transpose_286_perm_0, x = k_235_cast_fp16)[name = string("transpose_294")]; + tensor transpose_285 = transpose(perm = transpose_285_perm_0, x = q_187_cast_fp16)[name = string("transpose_295")]; + tensor qk_139_cast_fp16 = matmul(transpose_x = qk_139_transpose_x_0, transpose_y = qk_139_transpose_y_0, x = transpose_285, y = transpose_286)[name = string("qk_139_cast_fp16")]; + int32 concat_521_values1_0 = const()[name = string("concat_521_values1_0"), val = int32(448)]; + int32 concat_521_axis_0 = const()[name = string("concat_521_axis_0"), val = int32(0)]; + bool concat_521_interleave_0 = const()[name = string("concat_521_interleave_0"), val = bool(false)]; + tensor concat_521 = concat(axis = concat_521_axis_0, interleave = concat_521_interleave_0, values = (gather_278_cast_uint16_to_int32, concat_521_values1_0))[name = string("concat_521")]; + tensor var_5088_begin_0 = const()[name = string("op_5088_begin_0"), val = tensor([0, 0])]; + tensor var_5088_end_mask_0 = const()[name = string("op_5088_end_mask_0"), val = tensor([false, true])]; + tensor var_5088_cast_fp16 = slice_by_index(begin = var_5088_begin_0, end = concat_521, end_mask = var_5088_end_mask_0, x = mask_to_fp16)[name = string("op_5088_cast_fp16")]; + int32 concat_522_values0_0 = const()[name = string("concat_522_values0_0"), val = int32(0)]; + int32 concat_522_axis_0 = const()[name = string("concat_522_axis_0"), val = int32(0)]; + bool concat_522_interleave_0 = const()[name = string("concat_522_interleave_0"), val = bool(false)]; + tensor concat_522 = concat(axis = concat_522_axis_0, interleave = concat_522_interleave_0, values = (concat_522_values0_0, gather_278_cast_uint16_to_int32))[name = string("concat_522")]; + tensor var_5089_begin_0 = const()[name = string("op_5089_begin_0"), val = tensor([0, 0])]; + tensor var_5089_end_mask_0 = const()[name = string("op_5089_end_mask_0"), val = tensor([true, false])]; + tensor var_5089_cast_fp16 = slice_by_index(begin = var_5089_begin_0, end = concat_522, end_mask = var_5089_end_mask_0, x = var_5088_cast_fp16)[name = string("op_5089_cast_fp16")]; + tensor qk_141_cast_fp16 = add(x = qk_139_cast_fp16, y = var_5089_cast_fp16)[name = string("qk_141_cast_fp16")]; + tensor var_5092_cast_fp16 = softmax(axis = var_5001, x = qk_141_cast_fp16)[name = string("op_5092_cast_fp16")]; + bool var_5094_transpose_x_0 = const()[name = string("op_5094_transpose_x_0"), val = bool(false)]; + bool var_5094_transpose_y_0 = const()[name = string("op_5094_transpose_y_0"), val = bool(false)]; + tensor v_235_cast_fp16 = transpose(perm = var_5085, x = var_5084_cast_fp16)[name = string("transpose_296")]; + tensor var_5094_cast_fp16 = matmul(transpose_x = var_5094_transpose_x_0, transpose_y = var_5094_transpose_y_0, x = var_5092_cast_fp16, y = v_235_cast_fp16)[name = string("op_5094_cast_fp16")]; + tensor var_5095 = const()[name = string("op_5095"), val = tensor([0, 2, 1, 3])]; + tensor concat_523x = const()[name = string("concat_523x"), val = tensor([1, -1, 1024])]; + tensor var_5096_cast_fp16 = transpose(perm = var_5095, x = var_5094_cast_fp16)[name = string("transpose_293")]; + tensor x_421_cast_fp16 = reshape(shape = concat_523x, x = var_5096_cast_fp16)[name = string("x_421_cast_fp16")]; + tensor var_5100_to_fp16 = const()[name = string("op_5100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(792980480)))]; + tensor var_5101_to_fp16 = const()[name = string("op_5101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795077696)))]; + tensor linear_187_cast_fp16 = linear(bias = var_5101_to_fp16, weight = var_5100_to_fp16, x = x_421_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor x_423_cast_fp16 = add(x = x_417_cast_fp16, y = linear_187_cast_fp16)[name = string("x_423_cast_fp16")]; + tensor var_5108_axes_0 = const()[name = string("op_5108_axes_0"), val = tensor([-1])]; + tensor blocks_23_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795079808)))]; + tensor blocks_23_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795081920)))]; + tensor var_5108_cast_fp16 = layer_norm(axes = var_5108_axes_0, beta = blocks_23_cross_attn_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_cross_attn_ln_weight_to_fp16, x = x_423_cast_fp16)[name = string("op_5108_cast_fp16")]; + tensor var_5117_to_fp16 = const()[name = string("op_5117_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795084032)))]; + tensor var_5118_to_fp16 = const()[name = string("op_5118_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797181248)))]; + tensor linear_188_cast_fp16 = linear(bias = var_5118_to_fp16, weight = var_5117_to_fp16, x = var_5108_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor concat_524 = const()[name = string("concat_524"), val = tensor([0, 0, 0])]; + tensor concat_525 = const()[name = string("concat_525"), val = tensor([0, 1500, 0])]; + tensor k_237_internal_tensor_assign_1_stride_0 = const()[name = string("k_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_524, begin_mask = k_237_internal_tensor_assign_1_begin_mask_0, end = concat_525, end_mask = k_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_237_internal_tensor_assign_1_squeeze_mask_0, stride = k_237_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_526 = const()[name = string("concat_526"), val = tensor([0, 0, 0])]; + tensor concat_527 = const()[name = string("concat_527"), val = tensor([0, 1500, 0])]; + tensor v_237_internal_tensor_assign_1_stride_0 = const()[name = string("v_237_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_237_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_237_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_237_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_237_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_526, begin_mask = v_237_internal_tensor_assign_1_begin_mask_0, end = concat_527, end_mask = v_237_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_237_internal_tensor_assign_1_squeeze_mask_0, stride = v_237_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_237_internal_tensor_assign_1_cast_fp16")]; + tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, -1, 16, 64])]; + tensor var_5138_cast_fp16 = reshape(shape = concat_528x, x = linear_188_cast_fp16)[name = string("op_5138_cast_fp16")]; + tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_5138_cast_fp16, y = const_214_to_fp16)[name = string("q_cast_fp16")]; + tensor var_5144 = const()[name = string("op_5144"), val = tensor([1, 1500, 16, -1])]; + tensor var_5145_cast_fp16 = reshape(shape = var_5144, x = k_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5145_cast_fp16")]; + tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_5145_cast_fp16, y = const_215_to_fp16)[name = string("k_cast_fp16")]; + tensor var_5151 = const()[name = string("op_5151"), val = tensor([1, 1500, 16, -1])]; + tensor var_5152_cast_fp16 = reshape(shape = var_5151, x = v_237_internal_tensor_assign_1_cast_fp16)[name = string("op_5152_cast_fp16")]; + tensor var_5153 = const()[name = string("op_5153"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_287_perm_0 = const()[name = string("transpose_287_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_288_perm_0 = const()[name = string("transpose_288_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_288 = transpose(perm = transpose_288_perm_0, x = k_cast_fp16)[name = string("transpose_290")]; + tensor transpose_287 = transpose(perm = transpose_287_perm_0, x = q_cast_fp16)[name = string("transpose_291")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_287, y = transpose_288)[name = string("qk_cast_fp16")]; + tensor var_5157_cast_fp16 = softmax(axis = var_5001, x = qk_cast_fp16)[name = string("op_5157_cast_fp16")]; + bool var_5159_transpose_x_0 = const()[name = string("op_5159_transpose_x_0"), val = bool(false)]; + bool var_5159_transpose_y_0 = const()[name = string("op_5159_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_5153, x = var_5152_cast_fp16)[name = string("transpose_292")]; + tensor var_5159_cast_fp16 = matmul(transpose_x = var_5159_transpose_x_0, transpose_y = var_5159_transpose_y_0, x = var_5157_cast_fp16, y = v_cast_fp16)[name = string("op_5159_cast_fp16")]; + tensor var_5160 = const()[name = string("op_5160"), val = tensor([0, 2, 1, 3])]; + tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, -1, 1024])]; + tensor var_5161_cast_fp16 = transpose(perm = var_5160, x = var_5159_cast_fp16)[name = string("transpose_289")]; + tensor x_427_cast_fp16 = reshape(shape = concat_529x, x = var_5161_cast_fp16)[name = string("x_427_cast_fp16")]; + tensor var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797183360)))]; + tensor var_5166_to_fp16 = const()[name = string("op_5166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799280576)))]; + tensor linear_189_cast_fp16 = linear(bias = var_5166_to_fp16, weight = var_5165_to_fp16, x = x_427_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor x_429_cast_fp16 = add(x = x_423_cast_fp16, y = linear_189_cast_fp16)[name = string("x_429_cast_fp16")]; + tensor var_5173_axes_0 = const()[name = string("op_5173_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799282688)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799284800)))]; + tensor var_5173_cast_fp16 = layer_norm(axes = var_5173_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_5007_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_429_cast_fp16)[name = string("op_5173_cast_fp16")]; + tensor var_5182_to_fp16 = const()[name = string("op_5182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(799286912)))]; + tensor var_5183_to_fp16 = const()[name = string("op_5183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807675584)))]; + tensor linear_190_cast_fp16 = linear(bias = var_5183_to_fp16, weight = var_5182_to_fp16, x = var_5173_cast_fp16)[name = string("linear_190_cast_fp16")]; + string x_433_mode_0 = const()[name = string("x_433_mode_0"), val = string("EXACT")]; + tensor x_433_cast_fp16 = gelu(mode = x_433_mode_0, x = linear_190_cast_fp16)[name = string("x_433_cast_fp16")]; + tensor var_5188_to_fp16 = const()[name = string("op_5188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807683840)))]; + tensor var_5189_to_fp16 = const()[name = string("op_5189_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816072512)))]; + tensor linear_191_cast_fp16 = linear(bias = var_5189_to_fp16, weight = var_5188_to_fp16, x = x_433_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor x_435_cast_fp16 = add(x = x_429_cast_fp16, y = linear_191_cast_fp16)[name = string("x_435_cast_fp16")]; + tensor var_5202_axes_0 = const()[name = string("op_5202_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816074624)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816076736)))]; + fp16 var_5193_to_fp16 = const()[name = string("op_5193_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5202_cast_fp16 = layer_norm(axes = var_5202_axes_0, beta = ln_bias_to_fp16, epsilon = var_5193_to_fp16, gamma = ln_weight_to_fp16, x = x_435_cast_fp16)[name = string("op_5202_cast_fp16")]; + tensor var_5212_bias_0_to_fp16 = const()[name = string("op_5212_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816078848)))]; + tensor logits = linear(bias = var_5212_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_5202_cast_fp16)[name = string("op_5212_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/medium/decoder_second.mlmodelc/weights/weight.bin b/medium/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..fba75e31b9ef54e62e5968cda2fb1ab402230dc4 --- /dev/null +++ b/medium/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b39ed5d8c3a6ea265389ae0514446dd9fd1a2e2d1fa05ca312ba7f5c191c919 +size 816182642 diff --git a/medium/encoder.mlmodelc/analytics/coremldata.bin b/medium/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae03493199bfac3df6651194ca75cd8949716035 --- /dev/null +++ b/medium/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e46013df49c631abb62ecdde56db1b2578bef5f436747d44f5ae8e1c7ebcfdb +size 243 diff --git a/medium/encoder.mlmodelc/coremldata.bin b/medium/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a064fdd37da14988fa5735f312090bd26b3790e --- /dev/null +++ b/medium/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392253a90aa1cec92d5a6840d45c13ddeea2838456ad13320ac360d2bf0ca4d7 +size 318 diff --git a/medium/encoder.mlmodelc/metadata.json b/medium/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d22ccf5ba9572014832497aa51efd010de020d43 --- /dev/null +++ b/medium/encoder.mlmodelc/metadata.json @@ -0,0 +1,69 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 1024)", + "shortDescription" : "", + "shape" : "[1, 1500, 1024]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.mul" : 48, + "Ios18.softmax" : 24, + "Ios18.linear" : 144, + "Ios18.gelu" : 26, + "Ios18.layerNorm" : 49, + "Ios18.transpose" : 97, + "Ios18.matmul" : 48, + "Ios18.conv" : 2, + "Ios18.add" : 49, + "Ios18.reshape" : 96 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 80 × 3000)", + "shortDescription" : "", + "shape" : "[1, 80, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "encoder", + "method" : "predict" + } +] \ No newline at end of file diff --git a/medium/encoder.mlmodelc/model.mil b/medium/encoder.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..759bcad72cf1912b69db25ce5506b969631ff645 --- /dev/null +++ b/medium/encoder.mlmodelc/model.mil @@ -0,0 +1,1428 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor logmel_data) { + string var_68_pad_type_0 = const()[name = string("op_68_pad_type_0"), val = string("custom")]; + tensor var_68_pad_0 = const()[name = string("op_68_pad_0"), val = tensor([1, 1])]; + tensor var_68_strides_0 = const()[name = string("op_68_strides_0"), val = tensor([1])]; + tensor var_68_dilations_0 = const()[name = string("op_68_dilations_0"), val = tensor([1])]; + int32 var_68_groups_0 = const()[name = string("op_68_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(491648)))]; + tensor var_68_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_68_dilations_0, groups = var_68_groups_0, pad = var_68_pad_0, pad_type = var_68_pad_type_0, strides = var_68_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_68_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_68_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_86_pad_type_0 = const()[name = string("op_86_pad_type_0"), val = string("custom")]; + tensor var_86_pad_0 = const()[name = string("op_86_pad_0"), val = tensor([1, 1])]; + tensor var_86_strides_0 = const()[name = string("op_86_strides_0"), val = tensor([2])]; + tensor var_86_dilations_0 = const()[name = string("op_86_dilations_0"), val = tensor([1])]; + int32 var_86_groups_0 = const()[name = string("op_86_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493760)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6785280)))]; + tensor var_86_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_86_dilations_0, groups = var_86_groups_0, pad = var_86_pad_0, pad_type = var_86_pad_type_0, strides = var_86_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_86_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_86_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_92 = const()[name = string("op_92"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6787392)))]; + tensor x_5_cast_fp16 = transpose(perm = var_92, x = x_3_cast_fp16)[name = string("transpose_240")]; + tensor var_95_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_95_cast_fp16")]; + int32 var_108 = const()[name = string("op_108"), val = int32(-1)]; + tensor var_124_axes_0 = const()[name = string("op_124_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9859456)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9861568)))]; + fp16 var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_124_cast_fp16 = layer_norm(axes = var_124_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_114_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_95_cast_fp16)[name = string("op_124_cast_fp16")]; + tensor var_135_to_fp16 = const()[name = string("op_135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9863680)))]; + tensor var_136_to_fp16 = const()[name = string("op_136_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11960896)))]; + tensor linear_0_cast_fp16 = linear(bias = var_136_to_fp16, weight = var_135_to_fp16, x = var_124_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11963008)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14060224)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_139_to_fp16, x = var_124_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_143_to_fp16 = const()[name = string("op_143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14062336)))]; + tensor var_144_to_fp16 = const()[name = string("op_144_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16159552)))]; + tensor linear_2_cast_fp16 = linear(bias = var_144_to_fp16, weight = var_143_to_fp16, x = var_124_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_152 = const()[name = string("op_152"), val = tensor([1, 1500, 16, -1])]; + tensor var_153_cast_fp16 = reshape(shape = var_152, x = linear_0_cast_fp16)[name = string("op_153_cast_fp16")]; + tensor const_168_to_fp16 = const()[name = string("const_168_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_153_cast_fp16, y = const_168_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_159 = const()[name = string("op_159"), val = tensor([1, 1500, 16, -1])]; + tensor var_160_cast_fp16 = reshape(shape = var_159, x = linear_1_cast_fp16)[name = string("op_160_cast_fp16")]; + tensor const_169_to_fp16 = const()[name = string("const_169_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_160_cast_fp16, y = const_169_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_166 = const()[name = string("op_166"), val = tensor([1, 1500, 16, -1])]; + tensor var_167_cast_fp16 = reshape(shape = var_166, x = linear_2_cast_fp16)[name = string("op_167_cast_fp16")]; + tensor var_168 = const()[name = string("op_168"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = k_3_cast_fp16)[name = string("transpose_237")]; + tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_3_cast_fp16)[name = string("transpose_238")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_96, y = transpose_97)[name = string("qk_1_cast_fp16")]; + tensor var_172_cast_fp16 = softmax(axis = var_108, x = qk_1_cast_fp16)[name = string("op_172_cast_fp16")]; + bool var_174_transpose_x_0 = const()[name = string("op_174_transpose_x_0"), val = bool(false)]; + bool var_174_transpose_y_0 = const()[name = string("op_174_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_168, x = var_167_cast_fp16)[name = string("transpose_239")]; + tensor var_174_cast_fp16 = matmul(transpose_x = var_174_transpose_x_0, transpose_y = var_174_transpose_y_0, x = var_172_cast_fp16, y = v_3_cast_fp16)[name = string("op_174_cast_fp16")]; + tensor var_175 = const()[name = string("op_175"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 1024])]; + tensor var_176_cast_fp16 = transpose(perm = var_175, x = var_174_cast_fp16)[name = string("transpose_236")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_176_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_180_to_fp16 = const()[name = string("op_180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16161664)))]; + tensor var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18258880)))]; + tensor linear_3_cast_fp16 = linear(bias = var_181_to_fp16, weight = var_180_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_95_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_188_axes_0 = const()[name = string("op_188_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18260992)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18263104)))]; + tensor var_188_cast_fp16 = layer_norm(axes = var_188_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_114_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_188_cast_fp16")]; + tensor var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18265216)))]; + tensor var_198_to_fp16 = const()[name = string("op_198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26653888)))]; + tensor linear_4_cast_fp16 = linear(bias = var_198_to_fp16, weight = var_197_to_fp16, x = var_188_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26662144)))]; + tensor var_204_to_fp16 = const()[name = string("op_204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35050816)))]; + tensor linear_5_cast_fp16 = linear(bias = var_204_to_fp16, weight = var_203_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_214 = const()[name = string("op_214"), val = int32(-1)]; + tensor var_230_axes_0 = const()[name = string("op_230_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35052928)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35055040)))]; + fp16 var_220_to_fp16 = const()[name = string("op_220_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_230_cast_fp16 = layer_norm(axes = var_230_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_220_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_230_cast_fp16")]; + tensor var_241_to_fp16 = const()[name = string("op_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35057152)))]; + tensor var_242_to_fp16 = const()[name = string("op_242_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37154368)))]; + tensor linear_6_cast_fp16 = linear(bias = var_242_to_fp16, weight = var_241_to_fp16, x = var_230_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_245_to_fp16 = const()[name = string("op_245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37156480)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_245_to_fp16, x = var_230_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39253696)))]; + tensor var_250_to_fp16 = const()[name = string("op_250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41350912)))]; + tensor linear_8_cast_fp16 = linear(bias = var_250_to_fp16, weight = var_249_to_fp16, x = var_230_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_258 = const()[name = string("op_258"), val = tensor([1, 1500, 16, -1])]; + tensor var_259_cast_fp16 = reshape(shape = var_258, x = linear_6_cast_fp16)[name = string("op_259_cast_fp16")]; + tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_259_cast_fp16, y = const_170_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_265 = const()[name = string("op_265"), val = tensor([1, 1500, 16, -1])]; + tensor var_266_cast_fp16 = reshape(shape = var_265, x = linear_7_cast_fp16)[name = string("op_266_cast_fp16")]; + tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_266_cast_fp16, y = const_171_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_272 = const()[name = string("op_272"), val = tensor([1, 1500, 16, -1])]; + tensor var_273_cast_fp16 = reshape(shape = var_272, x = linear_8_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor var_274 = const()[name = string("op_274"), val = tensor([0, 2, 1, 3])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = k_7_cast_fp16)[name = string("transpose_233")]; + tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = q_7_cast_fp16)[name = string("transpose_234")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_98, y = transpose_99)[name = string("qk_3_cast_fp16")]; + tensor var_278_cast_fp16 = softmax(axis = var_214, x = qk_3_cast_fp16)[name = string("op_278_cast_fp16")]; + bool var_280_transpose_x_0 = const()[name = string("op_280_transpose_x_0"), val = bool(false)]; + bool var_280_transpose_y_0 = const()[name = string("op_280_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_274, x = var_273_cast_fp16)[name = string("transpose_235")]; + tensor var_280_cast_fp16 = matmul(transpose_x = var_280_transpose_x_0, transpose_y = var_280_transpose_y_0, x = var_278_cast_fp16, y = v_7_cast_fp16)[name = string("op_280_cast_fp16")]; + tensor var_281 = const()[name = string("op_281"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 1024])]; + tensor var_282_cast_fp16 = transpose(perm = var_281, x = var_280_cast_fp16)[name = string("transpose_232")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_282_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_286_to_fp16 = const()[name = string("op_286_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41353024)))]; + tensor var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43450240)))]; + tensor linear_9_cast_fp16 = linear(bias = var_287_to_fp16, weight = var_286_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_294_axes_0 = const()[name = string("op_294_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43452352)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43454464)))]; + tensor var_294_cast_fp16 = layer_norm(axes = var_294_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_220_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_294_cast_fp16")]; + tensor var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43456576)))]; + tensor var_304_to_fp16 = const()[name = string("op_304_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51845248)))]; + tensor linear_10_cast_fp16 = linear(bias = var_304_to_fp16, weight = var_303_to_fp16, x = var_294_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_309_to_fp16 = const()[name = string("op_309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51853504)))]; + tensor var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60242176)))]; + tensor linear_11_cast_fp16 = linear(bias = var_310_to_fp16, weight = var_309_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_320 = const()[name = string("op_320"), val = int32(-1)]; + tensor var_336_axes_0 = const()[name = string("op_336_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60244288)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60246400)))]; + fp16 var_326_to_fp16 = const()[name = string("op_326_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_336_cast_fp16 = layer_norm(axes = var_336_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_326_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_336_cast_fp16")]; + tensor var_347_to_fp16 = const()[name = string("op_347_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60248512)))]; + tensor var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62345728)))]; + tensor linear_12_cast_fp16 = linear(bias = var_348_to_fp16, weight = var_347_to_fp16, x = var_336_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_351_to_fp16 = const()[name = string("op_351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347840)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_351_to_fp16, x = var_336_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_355_to_fp16 = const()[name = string("op_355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64445056)))]; + tensor var_356_to_fp16 = const()[name = string("op_356_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66542272)))]; + tensor linear_14_cast_fp16 = linear(bias = var_356_to_fp16, weight = var_355_to_fp16, x = var_336_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_364 = const()[name = string("op_364"), val = tensor([1, 1500, 16, -1])]; + tensor var_365_cast_fp16 = reshape(shape = var_364, x = linear_12_cast_fp16)[name = string("op_365_cast_fp16")]; + tensor const_172_to_fp16 = const()[name = string("const_172_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_365_cast_fp16, y = const_172_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_371 = const()[name = string("op_371"), val = tensor([1, 1500, 16, -1])]; + tensor var_372_cast_fp16 = reshape(shape = var_371, x = linear_13_cast_fp16)[name = string("op_372_cast_fp16")]; + tensor const_173_to_fp16 = const()[name = string("const_173_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_372_cast_fp16, y = const_173_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_378 = const()[name = string("op_378"), val = tensor([1, 1500, 16, -1])]; + tensor var_379_cast_fp16 = reshape(shape = var_378, x = linear_14_cast_fp16)[name = string("op_379_cast_fp16")]; + tensor var_380 = const()[name = string("op_380"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = k_11_cast_fp16)[name = string("transpose_229")]; + tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_11_cast_fp16)[name = string("transpose_230")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_100, y = transpose_101)[name = string("qk_5_cast_fp16")]; + tensor var_384_cast_fp16 = softmax(axis = var_320, x = qk_5_cast_fp16)[name = string("op_384_cast_fp16")]; + bool var_386_transpose_x_0 = const()[name = string("op_386_transpose_x_0"), val = bool(false)]; + bool var_386_transpose_y_0 = const()[name = string("op_386_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_380, x = var_379_cast_fp16)[name = string("transpose_231")]; + tensor var_386_cast_fp16 = matmul(transpose_x = var_386_transpose_x_0, transpose_y = var_386_transpose_y_0, x = var_384_cast_fp16, y = v_11_cast_fp16)[name = string("op_386_cast_fp16")]; + tensor var_387 = const()[name = string("op_387"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 1024])]; + tensor var_388_cast_fp16 = transpose(perm = var_387, x = var_386_cast_fp16)[name = string("transpose_228")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_388_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66544384)))]; + tensor var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68641600)))]; + tensor linear_15_cast_fp16 = linear(bias = var_393_to_fp16, weight = var_392_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_400_axes_0 = const()[name = string("op_400_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68643712)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68645824)))]; + tensor var_400_cast_fp16 = layer_norm(axes = var_400_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_326_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_400_cast_fp16")]; + tensor var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68647936)))]; + tensor var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77036608)))]; + tensor linear_16_cast_fp16 = linear(bias = var_410_to_fp16, weight = var_409_to_fp16, x = var_400_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_415_to_fp16 = const()[name = string("op_415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77044864)))]; + tensor var_416_to_fp16 = const()[name = string("op_416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85433536)))]; + tensor linear_17_cast_fp16 = linear(bias = var_416_to_fp16, weight = var_415_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_426 = const()[name = string("op_426"), val = int32(-1)]; + tensor var_442_axes_0 = const()[name = string("op_442_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85435648)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85437760)))]; + fp16 var_432_to_fp16 = const()[name = string("op_432_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_442_cast_fp16 = layer_norm(axes = var_442_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_432_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_442_cast_fp16")]; + tensor var_453_to_fp16 = const()[name = string("op_453_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85439872)))]; + tensor var_454_to_fp16 = const()[name = string("op_454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87537088)))]; + tensor linear_18_cast_fp16 = linear(bias = var_454_to_fp16, weight = var_453_to_fp16, x = var_442_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_457_to_fp16 = const()[name = string("op_457_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87539200)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_457_to_fp16, x = var_442_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_461_to_fp16 = const()[name = string("op_461_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89636416)))]; + tensor var_462_to_fp16 = const()[name = string("op_462_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91733632)))]; + tensor linear_20_cast_fp16 = linear(bias = var_462_to_fp16, weight = var_461_to_fp16, x = var_442_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_470 = const()[name = string("op_470"), val = tensor([1, 1500, 16, -1])]; + tensor var_471_cast_fp16 = reshape(shape = var_470, x = linear_18_cast_fp16)[name = string("op_471_cast_fp16")]; + tensor const_174_to_fp16 = const()[name = string("const_174_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_471_cast_fp16, y = const_174_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_477 = const()[name = string("op_477"), val = tensor([1, 1500, 16, -1])]; + tensor var_478_cast_fp16 = reshape(shape = var_477, x = linear_19_cast_fp16)[name = string("op_478_cast_fp16")]; + tensor const_175_to_fp16 = const()[name = string("const_175_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_478_cast_fp16, y = const_175_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_484 = const()[name = string("op_484"), val = tensor([1, 1500, 16, -1])]; + tensor var_485_cast_fp16 = reshape(shape = var_484, x = linear_20_cast_fp16)[name = string("op_485_cast_fp16")]; + tensor var_486 = const()[name = string("op_486"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = k_15_cast_fp16)[name = string("transpose_225")]; + tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = q_15_cast_fp16)[name = string("transpose_226")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_102, y = transpose_103)[name = string("qk_7_cast_fp16")]; + tensor var_490_cast_fp16 = softmax(axis = var_426, x = qk_7_cast_fp16)[name = string("op_490_cast_fp16")]; + bool var_492_transpose_x_0 = const()[name = string("op_492_transpose_x_0"), val = bool(false)]; + bool var_492_transpose_y_0 = const()[name = string("op_492_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_486, x = var_485_cast_fp16)[name = string("transpose_227")]; + tensor var_492_cast_fp16 = matmul(transpose_x = var_492_transpose_x_0, transpose_y = var_492_transpose_y_0, x = var_490_cast_fp16, y = v_15_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor var_493 = const()[name = string("op_493"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 1024])]; + tensor var_494_cast_fp16 = transpose(perm = var_493, x = var_492_cast_fp16)[name = string("transpose_224")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_494_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91735744)))]; + tensor var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93832960)))]; + tensor linear_21_cast_fp16 = linear(bias = var_499_to_fp16, weight = var_498_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_506_axes_0 = const()[name = string("op_506_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93835072)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93837184)))]; + tensor var_506_cast_fp16 = layer_norm(axes = var_506_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_432_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93839296)))]; + tensor var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102227968)))]; + tensor linear_22_cast_fp16 = linear(bias = var_516_to_fp16, weight = var_515_to_fp16, x = var_506_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_521_to_fp16 = const()[name = string("op_521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102236224)))]; + tensor var_522_to_fp16 = const()[name = string("op_522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110624896)))]; + tensor linear_23_cast_fp16 = linear(bias = var_522_to_fp16, weight = var_521_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")]; + int32 var_532 = const()[name = string("op_532"), val = int32(-1)]; + tensor var_548_axes_0 = const()[name = string("op_548_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110627008)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110629120)))]; + fp16 var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_548_cast_fp16 = layer_norm(axes = var_548_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_538_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_548_cast_fp16")]; + tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110631232)))]; + tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112728448)))]; + tensor linear_24_cast_fp16 = linear(bias = var_560_to_fp16, weight = var_559_to_fp16, x = var_548_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_563_to_fp16 = const()[name = string("op_563_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112730560)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_563_to_fp16, x = var_548_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_567_to_fp16 = const()[name = string("op_567_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114827776)))]; + tensor var_568_to_fp16 = const()[name = string("op_568_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116924992)))]; + tensor linear_26_cast_fp16 = linear(bias = var_568_to_fp16, weight = var_567_to_fp16, x = var_548_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_576 = const()[name = string("op_576"), val = tensor([1, 1500, 16, -1])]; + tensor var_577_cast_fp16 = reshape(shape = var_576, x = linear_24_cast_fp16)[name = string("op_577_cast_fp16")]; + tensor const_176_to_fp16 = const()[name = string("const_176_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_577_cast_fp16, y = const_176_to_fp16)[name = string("q_19_cast_fp16")]; + tensor var_583 = const()[name = string("op_583"), val = tensor([1, 1500, 16, -1])]; + tensor var_584_cast_fp16 = reshape(shape = var_583, x = linear_25_cast_fp16)[name = string("op_584_cast_fp16")]; + tensor const_177_to_fp16 = const()[name = string("const_177_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_584_cast_fp16, y = const_177_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_590 = const()[name = string("op_590"), val = tensor([1, 1500, 16, -1])]; + tensor var_591_cast_fp16 = reshape(shape = var_590, x = linear_26_cast_fp16)[name = string("op_591_cast_fp16")]; + tensor var_592 = const()[name = string("op_592"), val = tensor([0, 2, 1, 3])]; + bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)]; + bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)]; + tensor transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = k_19_cast_fp16)[name = string("transpose_221")]; + tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = q_19_cast_fp16)[name = string("transpose_222")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_104, y = transpose_105)[name = string("qk_9_cast_fp16")]; + tensor var_596_cast_fp16 = softmax(axis = var_532, x = qk_9_cast_fp16)[name = string("op_596_cast_fp16")]; + bool var_598_transpose_x_0 = const()[name = string("op_598_transpose_x_0"), val = bool(false)]; + bool var_598_transpose_y_0 = const()[name = string("op_598_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_592, x = var_591_cast_fp16)[name = string("transpose_223")]; + tensor var_598_cast_fp16 = matmul(transpose_x = var_598_transpose_x_0, transpose_y = var_598_transpose_y_0, x = var_596_cast_fp16, y = v_19_cast_fp16)[name = string("op_598_cast_fp16")]; + tensor var_599 = const()[name = string("op_599"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 1500, 1024])]; + tensor var_600_cast_fp16 = transpose(perm = var_599, x = var_598_cast_fp16)[name = string("transpose_220")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = var_600_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116927104)))]; + tensor var_605_to_fp16 = const()[name = string("op_605_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119024320)))]; + tensor linear_27_cast_fp16 = linear(bias = var_605_to_fp16, weight = var_604_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_612_axes_0 = const()[name = string("op_612_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119026432)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119028544)))]; + tensor var_612_cast_fp16 = layer_norm(axes = var_612_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_538_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_612_cast_fp16")]; + tensor var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119030656)))]; + tensor var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127419328)))]; + tensor linear_28_cast_fp16 = linear(bias = var_622_to_fp16, weight = var_621_to_fp16, x = var_612_cast_fp16)[name = string("linear_28_cast_fp16")]; + string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = string("op_627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127427584)))]; + tensor var_628_to_fp16 = const()[name = string("op_628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135816256)))]; + tensor linear_29_cast_fp16 = linear(bias = var_628_to_fp16, weight = var_627_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")]; + int32 var_638 = const()[name = string("op_638"), val = int32(-1)]; + tensor var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135818368)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135820480)))]; + fp16 var_644_to_fp16 = const()[name = string("op_644_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_654_cast_fp16 = layer_norm(axes = var_654_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_644_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_654_cast_fp16")]; + tensor var_665_to_fp16 = const()[name = string("op_665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135822592)))]; + tensor var_666_to_fp16 = const()[name = string("op_666_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137919808)))]; + tensor linear_30_cast_fp16 = linear(bias = var_666_to_fp16, weight = var_665_to_fp16, x = var_654_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137921920)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_669_to_fp16, x = var_654_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor var_673_to_fp16 = const()[name = string("op_673_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140019136)))]; + tensor var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142116352)))]; + tensor linear_32_cast_fp16 = linear(bias = var_674_to_fp16, weight = var_673_to_fp16, x = var_654_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_682 = const()[name = string("op_682"), val = tensor([1, 1500, 16, -1])]; + tensor var_683_cast_fp16 = reshape(shape = var_682, x = linear_30_cast_fp16)[name = string("op_683_cast_fp16")]; + tensor const_178_to_fp16 = const()[name = string("const_178_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_683_cast_fp16, y = const_178_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_689 = const()[name = string("op_689"), val = tensor([1, 1500, 16, -1])]; + tensor var_690_cast_fp16 = reshape(shape = var_689, x = linear_31_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor const_179_to_fp16 = const()[name = string("const_179_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_23_cast_fp16 = mul(x = var_690_cast_fp16, y = const_179_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_696 = const()[name = string("op_696"), val = tensor([1, 1500, 16, -1])]; + tensor var_697_cast_fp16 = reshape(shape = var_696, x = linear_32_cast_fp16)[name = string("op_697_cast_fp16")]; + tensor var_698 = const()[name = string("op_698"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = k_23_cast_fp16)[name = string("transpose_217")]; + tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = q_23_cast_fp16)[name = string("transpose_218")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_106, y = transpose_107)[name = string("qk_11_cast_fp16")]; + tensor var_702_cast_fp16 = softmax(axis = var_638, x = qk_11_cast_fp16)[name = string("op_702_cast_fp16")]; + bool var_704_transpose_x_0 = const()[name = string("op_704_transpose_x_0"), val = bool(false)]; + bool var_704_transpose_y_0 = const()[name = string("op_704_transpose_y_0"), val = bool(false)]; + tensor v_23_cast_fp16 = transpose(perm = var_698, x = var_697_cast_fp16)[name = string("transpose_219")]; + tensor var_704_cast_fp16 = matmul(transpose_x = var_704_transpose_x_0, transpose_y = var_704_transpose_y_0, x = var_702_cast_fp16, y = v_23_cast_fp16)[name = string("op_704_cast_fp16")]; + tensor var_705 = const()[name = string("op_705"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([1, 1500, 1024])]; + tensor var_706_cast_fp16 = transpose(perm = var_705, x = var_704_cast_fp16)[name = string("transpose_216")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = var_706_cast_fp16)[name = string("x_71_cast_fp16")]; + tensor var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142118464)))]; + tensor var_711_to_fp16 = const()[name = string("op_711_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144215680)))]; + tensor linear_33_cast_fp16 = linear(bias = var_711_to_fp16, weight = var_710_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_718_axes_0 = const()[name = string("op_718_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144217792)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144219904)))]; + tensor var_718_cast_fp16 = layer_norm(axes = var_718_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_644_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_718_cast_fp16")]; + tensor var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144222016)))]; + tensor var_728_to_fp16 = const()[name = string("op_728_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152610688)))]; + tensor linear_34_cast_fp16 = linear(bias = var_728_to_fp16, weight = var_727_to_fp16, x = var_718_cast_fp16)[name = string("linear_34_cast_fp16")]; + string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")]; + tensor var_733_to_fp16 = const()[name = string("op_733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152618944)))]; + tensor var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161007616)))]; + tensor linear_35_cast_fp16 = linear(bias = var_734_to_fp16, weight = var_733_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_744 = const()[name = string("op_744"), val = int32(-1)]; + tensor var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161009728)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161011840)))]; + fp16 var_750_to_fp16 = const()[name = string("op_750_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_760_cast_fp16 = layer_norm(axes = var_760_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_750_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor var_771_to_fp16 = const()[name = string("op_771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161013952)))]; + tensor var_772_to_fp16 = const()[name = string("op_772_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163111168)))]; + tensor linear_36_cast_fp16 = linear(bias = var_772_to_fp16, weight = var_771_to_fp16, x = var_760_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor var_775_to_fp16 = const()[name = string("op_775_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163113280)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_775_to_fp16, x = var_760_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor var_779_to_fp16 = const()[name = string("op_779_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165210496)))]; + tensor var_780_to_fp16 = const()[name = string("op_780_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167307712)))]; + tensor linear_38_cast_fp16 = linear(bias = var_780_to_fp16, weight = var_779_to_fp16, x = var_760_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor var_788 = const()[name = string("op_788"), val = tensor([1, 1500, 16, -1])]; + tensor var_789_cast_fp16 = reshape(shape = var_788, x = linear_36_cast_fp16)[name = string("op_789_cast_fp16")]; + tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_789_cast_fp16, y = const_180_to_fp16)[name = string("q_27_cast_fp16")]; + tensor var_795 = const()[name = string("op_795"), val = tensor([1, 1500, 16, -1])]; + tensor var_796_cast_fp16 = reshape(shape = var_795, x = linear_37_cast_fp16)[name = string("op_796_cast_fp16")]; + tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_27_cast_fp16 = mul(x = var_796_cast_fp16, y = const_181_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_802 = const()[name = string("op_802"), val = tensor([1, 1500, 16, -1])]; + tensor var_803_cast_fp16 = reshape(shape = var_802, x = linear_38_cast_fp16)[name = string("op_803_cast_fp16")]; + tensor var_804 = const()[name = string("op_804"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = k_27_cast_fp16)[name = string("transpose_213")]; + tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = q_27_cast_fp16)[name = string("transpose_214")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_108, y = transpose_109)[name = string("qk_13_cast_fp16")]; + tensor var_808_cast_fp16 = softmax(axis = var_744, x = qk_13_cast_fp16)[name = string("op_808_cast_fp16")]; + bool var_810_transpose_x_0 = const()[name = string("op_810_transpose_x_0"), val = bool(false)]; + bool var_810_transpose_y_0 = const()[name = string("op_810_transpose_y_0"), val = bool(false)]; + tensor v_27_cast_fp16 = transpose(perm = var_804, x = var_803_cast_fp16)[name = string("transpose_215")]; + tensor var_810_cast_fp16 = matmul(transpose_x = var_810_transpose_x_0, transpose_y = var_810_transpose_y_0, x = var_808_cast_fp16, y = v_27_cast_fp16)[name = string("op_810_cast_fp16")]; + tensor var_811 = const()[name = string("op_811"), val = tensor([0, 2, 1, 3])]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([1, 1500, 1024])]; + tensor var_812_cast_fp16 = transpose(perm = var_811, x = var_810_cast_fp16)[name = string("transpose_212")]; + tensor x_83_cast_fp16 = reshape(shape = concat_6, x = var_812_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167309824)))]; + tensor var_817_to_fp16 = const()[name = string("op_817_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169407040)))]; + tensor linear_39_cast_fp16 = linear(bias = var_817_to_fp16, weight = var_816_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_824_axes_0 = const()[name = string("op_824_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169409152)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169411264)))]; + tensor var_824_cast_fp16 = layer_norm(axes = var_824_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_750_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_824_cast_fp16")]; + tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169413376)))]; + tensor var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177802048)))]; + tensor linear_40_cast_fp16 = linear(bias = var_834_to_fp16, weight = var_833_to_fp16, x = var_824_cast_fp16)[name = string("linear_40_cast_fp16")]; + string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")]; + tensor x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_839_to_fp16 = const()[name = string("op_839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177810304)))]; + tensor var_840_to_fp16 = const()[name = string("op_840_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186198976)))]; + tensor linear_41_cast_fp16 = linear(bias = var_840_to_fp16, weight = var_839_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")]; + int32 var_850 = const()[name = string("op_850"), val = int32(-1)]; + tensor var_866_axes_0 = const()[name = string("op_866_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186201088)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186203200)))]; + fp16 var_856_to_fp16 = const()[name = string("op_856_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_866_cast_fp16 = layer_norm(axes = var_866_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_856_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_866_cast_fp16")]; + tensor var_877_to_fp16 = const()[name = string("op_877_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186205312)))]; + tensor var_878_to_fp16 = const()[name = string("op_878_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188302528)))]; + tensor linear_42_cast_fp16 = linear(bias = var_878_to_fp16, weight = var_877_to_fp16, x = var_866_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_881_to_fp16 = const()[name = string("op_881_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188304640)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_881_to_fp16, x = var_866_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor var_885_to_fp16 = const()[name = string("op_885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190401856)))]; + tensor var_886_to_fp16 = const()[name = string("op_886_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192499072)))]; + tensor linear_44_cast_fp16 = linear(bias = var_886_to_fp16, weight = var_885_to_fp16, x = var_866_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor var_894 = const()[name = string("op_894"), val = tensor([1, 1500, 16, -1])]; + tensor var_895_cast_fp16 = reshape(shape = var_894, x = linear_42_cast_fp16)[name = string("op_895_cast_fp16")]; + tensor const_182_to_fp16 = const()[name = string("const_182_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_895_cast_fp16, y = const_182_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_901 = const()[name = string("op_901"), val = tensor([1, 1500, 16, -1])]; + tensor var_902_cast_fp16 = reshape(shape = var_901, x = linear_43_cast_fp16)[name = string("op_902_cast_fp16")]; + tensor const_183_to_fp16 = const()[name = string("const_183_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_31_cast_fp16 = mul(x = var_902_cast_fp16, y = const_183_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_908 = const()[name = string("op_908"), val = tensor([1, 1500, 16, -1])]; + tensor var_909_cast_fp16 = reshape(shape = var_908, x = linear_44_cast_fp16)[name = string("op_909_cast_fp16")]; + tensor var_910 = const()[name = string("op_910"), val = tensor([0, 2, 1, 3])]; + bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)]; + bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)]; + tensor transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = k_31_cast_fp16)[name = string("transpose_209")]; + tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = q_31_cast_fp16)[name = string("transpose_210")]; + tensor qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_110, y = transpose_111)[name = string("qk_15_cast_fp16")]; + tensor var_914_cast_fp16 = softmax(axis = var_850, x = qk_15_cast_fp16)[name = string("op_914_cast_fp16")]; + bool var_916_transpose_x_0 = const()[name = string("op_916_transpose_x_0"), val = bool(false)]; + bool var_916_transpose_y_0 = const()[name = string("op_916_transpose_y_0"), val = bool(false)]; + tensor v_31_cast_fp16 = transpose(perm = var_910, x = var_909_cast_fp16)[name = string("transpose_211")]; + tensor var_916_cast_fp16 = matmul(transpose_x = var_916_transpose_x_0, transpose_y = var_916_transpose_y_0, x = var_914_cast_fp16, y = v_31_cast_fp16)[name = string("op_916_cast_fp16")]; + tensor var_917 = const()[name = string("op_917"), val = tensor([0, 2, 1, 3])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([1, 1500, 1024])]; + tensor var_918_cast_fp16 = transpose(perm = var_917, x = var_916_cast_fp16)[name = string("transpose_208")]; + tensor x_95_cast_fp16 = reshape(shape = concat_7, x = var_918_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192501184)))]; + tensor var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194598400)))]; + tensor linear_45_cast_fp16 = linear(bias = var_923_to_fp16, weight = var_922_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_930_axes_0 = const()[name = string("op_930_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194600512)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194602624)))]; + tensor var_930_cast_fp16 = layer_norm(axes = var_930_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_856_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_930_cast_fp16")]; + tensor var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194604736)))]; + tensor var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202993408)))]; + tensor linear_46_cast_fp16 = linear(bias = var_940_to_fp16, weight = var_939_to_fp16, x = var_930_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")]; + tensor x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_945_to_fp16 = const()[name = string("op_945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203001664)))]; + tensor var_946_to_fp16 = const()[name = string("op_946_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211390336)))]; + tensor linear_47_cast_fp16 = linear(bias = var_946_to_fp16, weight = var_945_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")]; + int32 var_956 = const()[name = string("op_956"), val = int32(-1)]; + tensor var_972_axes_0 = const()[name = string("op_972_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211392448)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211394560)))]; + fp16 var_962_to_fp16 = const()[name = string("op_962_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_972_cast_fp16 = layer_norm(axes = var_972_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_962_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_972_cast_fp16")]; + tensor var_983_to_fp16 = const()[name = string("op_983_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211396672)))]; + tensor var_984_to_fp16 = const()[name = string("op_984_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213493888)))]; + tensor linear_48_cast_fp16 = linear(bias = var_984_to_fp16, weight = var_983_to_fp16, x = var_972_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_987_to_fp16 = const()[name = string("op_987_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213496000)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_987_to_fp16, x = var_972_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_991_to_fp16 = const()[name = string("op_991_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215593216)))]; + tensor var_992_to_fp16 = const()[name = string("op_992_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217690432)))]; + tensor linear_50_cast_fp16 = linear(bias = var_992_to_fp16, weight = var_991_to_fp16, x = var_972_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1000 = const()[name = string("op_1000"), val = tensor([1, 1500, 16, -1])]; + tensor var_1001_cast_fp16 = reshape(shape = var_1000, x = linear_48_cast_fp16)[name = string("op_1001_cast_fp16")]; + tensor const_184_to_fp16 = const()[name = string("const_184_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1001_cast_fp16, y = const_184_to_fp16)[name = string("q_35_cast_fp16")]; + tensor var_1007 = const()[name = string("op_1007"), val = tensor([1, 1500, 16, -1])]; + tensor var_1008_cast_fp16 = reshape(shape = var_1007, x = linear_49_cast_fp16)[name = string("op_1008_cast_fp16")]; + tensor const_185_to_fp16 = const()[name = string("const_185_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_1008_cast_fp16, y = const_185_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_1014 = const()[name = string("op_1014"), val = tensor([1, 1500, 16, -1])]; + tensor var_1015_cast_fp16 = reshape(shape = var_1014, x = linear_50_cast_fp16)[name = string("op_1015_cast_fp16")]; + tensor var_1016 = const()[name = string("op_1016"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = k_35_cast_fp16)[name = string("transpose_205")]; + tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = q_35_cast_fp16)[name = string("transpose_206")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_112, y = transpose_113)[name = string("qk_17_cast_fp16")]; + tensor var_1020_cast_fp16 = softmax(axis = var_956, x = qk_17_cast_fp16)[name = string("op_1020_cast_fp16")]; + bool var_1022_transpose_x_0 = const()[name = string("op_1022_transpose_x_0"), val = bool(false)]; + bool var_1022_transpose_y_0 = const()[name = string("op_1022_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_1016, x = var_1015_cast_fp16)[name = string("transpose_207")]; + tensor var_1022_cast_fp16 = matmul(transpose_x = var_1022_transpose_x_0, transpose_y = var_1022_transpose_y_0, x = var_1020_cast_fp16, y = v_35_cast_fp16)[name = string("op_1022_cast_fp16")]; + tensor var_1023 = const()[name = string("op_1023"), val = tensor([0, 2, 1, 3])]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 1500, 1024])]; + tensor var_1024_cast_fp16 = transpose(perm = var_1023, x = var_1022_cast_fp16)[name = string("transpose_204")]; + tensor x_107_cast_fp16 = reshape(shape = concat_8, x = var_1024_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor var_1028_to_fp16 = const()[name = string("op_1028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217692544)))]; + tensor var_1029_to_fp16 = const()[name = string("op_1029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219789760)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1029_to_fp16, weight = var_1028_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1036_axes_0 = const()[name = string("op_1036_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219791872)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219793984)))]; + tensor var_1036_cast_fp16 = layer_norm(axes = var_1036_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_962_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1036_cast_fp16")]; + tensor var_1045_to_fp16 = const()[name = string("op_1045_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219796096)))]; + tensor var_1046_to_fp16 = const()[name = string("op_1046_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228184768)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1046_to_fp16, weight = var_1045_to_fp16, x = var_1036_cast_fp16)[name = string("linear_52_cast_fp16")]; + string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")]; + tensor x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")]; + tensor var_1051_to_fp16 = const()[name = string("op_1051_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228193024)))]; + tensor var_1052_to_fp16 = const()[name = string("op_1052_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236581696)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1052_to_fp16, weight = var_1051_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")]; + int32 var_1062 = const()[name = string("op_1062"), val = int32(-1)]; + tensor var_1078_axes_0 = const()[name = string("op_1078_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236583808)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236585920)))]; + fp16 var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1078_cast_fp16 = layer_norm(axes = var_1078_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1068_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1078_cast_fp16")]; + tensor var_1089_to_fp16 = const()[name = string("op_1089_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236588032)))]; + tensor var_1090_to_fp16 = const()[name = string("op_1090_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238685248)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1090_to_fp16, weight = var_1089_to_fp16, x = var_1078_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor var_1093_to_fp16 = const()[name = string("op_1093_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238687360)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1093_to_fp16, x = var_1078_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor var_1097_to_fp16 = const()[name = string("op_1097_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240784576)))]; + tensor var_1098_to_fp16 = const()[name = string("op_1098_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242881792)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1098_to_fp16, weight = var_1097_to_fp16, x = var_1078_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1106 = const()[name = string("op_1106"), val = tensor([1, 1500, 16, -1])]; + tensor var_1107_cast_fp16 = reshape(shape = var_1106, x = linear_54_cast_fp16)[name = string("op_1107_cast_fp16")]; + tensor const_186_to_fp16 = const()[name = string("const_186_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1107_cast_fp16, y = const_186_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1113 = const()[name = string("op_1113"), val = tensor([1, 1500, 16, -1])]; + tensor var_1114_cast_fp16 = reshape(shape = var_1113, x = linear_55_cast_fp16)[name = string("op_1114_cast_fp16")]; + tensor const_187_to_fp16 = const()[name = string("const_187_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_1114_cast_fp16, y = const_187_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_1120 = const()[name = string("op_1120"), val = tensor([1, 1500, 16, -1])]; + tensor var_1121_cast_fp16 = reshape(shape = var_1120, x = linear_56_cast_fp16)[name = string("op_1121_cast_fp16")]; + tensor var_1122 = const()[name = string("op_1122"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = k_39_cast_fp16)[name = string("transpose_201")]; + tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = q_39_cast_fp16)[name = string("transpose_202")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_114, y = transpose_115)[name = string("qk_19_cast_fp16")]; + tensor var_1126_cast_fp16 = softmax(axis = var_1062, x = qk_19_cast_fp16)[name = string("op_1126_cast_fp16")]; + bool var_1128_transpose_x_0 = const()[name = string("op_1128_transpose_x_0"), val = bool(false)]; + bool var_1128_transpose_y_0 = const()[name = string("op_1128_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_1122, x = var_1121_cast_fp16)[name = string("transpose_203")]; + tensor var_1128_cast_fp16 = matmul(transpose_x = var_1128_transpose_x_0, transpose_y = var_1128_transpose_y_0, x = var_1126_cast_fp16, y = v_39_cast_fp16)[name = string("op_1128_cast_fp16")]; + tensor var_1129 = const()[name = string("op_1129"), val = tensor([0, 2, 1, 3])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([1, 1500, 1024])]; + tensor var_1130_cast_fp16 = transpose(perm = var_1129, x = var_1128_cast_fp16)[name = string("transpose_200")]; + tensor x_119_cast_fp16 = reshape(shape = concat_9, x = var_1130_cast_fp16)[name = string("x_119_cast_fp16")]; + tensor var_1134_to_fp16 = const()[name = string("op_1134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242883904)))]; + tensor var_1135_to_fp16 = const()[name = string("op_1135_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244981120)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1135_to_fp16, weight = var_1134_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1142_axes_0 = const()[name = string("op_1142_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244983232)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244985344)))]; + tensor var_1142_cast_fp16 = layer_norm(axes = var_1142_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1068_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1142_cast_fp16")]; + tensor var_1151_to_fp16 = const()[name = string("op_1151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244987456)))]; + tensor var_1152_to_fp16 = const()[name = string("op_1152_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253376128)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1152_to_fp16, weight = var_1151_to_fp16, x = var_1142_cast_fp16)[name = string("linear_58_cast_fp16")]; + string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")]; + tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1157_to_fp16 = const()[name = string("op_1157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253384384)))]; + tensor var_1158_to_fp16 = const()[name = string("op_1158_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261773056)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1158_to_fp16, weight = var_1157_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")]; + int32 var_1168 = const()[name = string("op_1168"), val = int32(-1)]; + tensor var_1184_axes_0 = const()[name = string("op_1184_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261775168)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261777280)))]; + fp16 var_1174_to_fp16 = const()[name = string("op_1174_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1184_cast_fp16 = layer_norm(axes = var_1184_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1174_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1184_cast_fp16")]; + tensor var_1195_to_fp16 = const()[name = string("op_1195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261779392)))]; + tensor var_1196_to_fp16 = const()[name = string("op_1196_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263876608)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1196_to_fp16, weight = var_1195_to_fp16, x = var_1184_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1199_to_fp16 = const()[name = string("op_1199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263878720)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1199_to_fp16, x = var_1184_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265975936)))]; + tensor var_1204_to_fp16 = const()[name = string("op_1204_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268073152)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1204_to_fp16, weight = var_1203_to_fp16, x = var_1184_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor var_1212 = const()[name = string("op_1212"), val = tensor([1, 1500, 16, -1])]; + tensor var_1213_cast_fp16 = reshape(shape = var_1212, x = linear_60_cast_fp16)[name = string("op_1213_cast_fp16")]; + tensor const_188_to_fp16 = const()[name = string("const_188_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1213_cast_fp16, y = const_188_to_fp16)[name = string("q_43_cast_fp16")]; + tensor var_1219 = const()[name = string("op_1219"), val = tensor([1, 1500, 16, -1])]; + tensor var_1220_cast_fp16 = reshape(shape = var_1219, x = linear_61_cast_fp16)[name = string("op_1220_cast_fp16")]; + tensor const_189_to_fp16 = const()[name = string("const_189_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_43_cast_fp16 = mul(x = var_1220_cast_fp16, y = const_189_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_1226 = const()[name = string("op_1226"), val = tensor([1, 1500, 16, -1])]; + tensor var_1227_cast_fp16 = reshape(shape = var_1226, x = linear_62_cast_fp16)[name = string("op_1227_cast_fp16")]; + tensor var_1228 = const()[name = string("op_1228"), val = tensor([0, 2, 1, 3])]; + bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)]; + bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)]; + tensor transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = k_43_cast_fp16)[name = string("transpose_197")]; + tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = q_43_cast_fp16)[name = string("transpose_198")]; + tensor qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_116, y = transpose_117)[name = string("qk_21_cast_fp16")]; + tensor var_1232_cast_fp16 = softmax(axis = var_1168, x = qk_21_cast_fp16)[name = string("op_1232_cast_fp16")]; + bool var_1234_transpose_x_0 = const()[name = string("op_1234_transpose_x_0"), val = bool(false)]; + bool var_1234_transpose_y_0 = const()[name = string("op_1234_transpose_y_0"), val = bool(false)]; + tensor v_43_cast_fp16 = transpose(perm = var_1228, x = var_1227_cast_fp16)[name = string("transpose_199")]; + tensor var_1234_cast_fp16 = matmul(transpose_x = var_1234_transpose_x_0, transpose_y = var_1234_transpose_y_0, x = var_1232_cast_fp16, y = v_43_cast_fp16)[name = string("op_1234_cast_fp16")]; + tensor var_1235 = const()[name = string("op_1235"), val = tensor([0, 2, 1, 3])]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([1, 1500, 1024])]; + tensor var_1236_cast_fp16 = transpose(perm = var_1235, x = var_1234_cast_fp16)[name = string("transpose_196")]; + tensor x_131_cast_fp16 = reshape(shape = concat_10, x = var_1236_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268075264)))]; + tensor var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270172480)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1241_to_fp16, weight = var_1240_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1248_axes_0 = const()[name = string("op_1248_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270174592)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270176704)))]; + tensor var_1248_cast_fp16 = layer_norm(axes = var_1248_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1174_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1248_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = string("op_1257_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270178816)))]; + tensor var_1258_to_fp16 = const()[name = string("op_1258_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278567488)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1258_to_fp16, weight = var_1257_to_fp16, x = var_1248_cast_fp16)[name = string("linear_64_cast_fp16")]; + string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")]; + tensor x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278575744)))]; + tensor var_1264_to_fp16 = const()[name = string("op_1264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286964416)))]; + tensor linear_65_cast_fp16 = linear(bias = var_1264_to_fp16, weight = var_1263_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")]; + int32 var_1274 = const()[name = string("op_1274"), val = int32(-1)]; + tensor var_1290_axes_0 = const()[name = string("op_1290_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286966528)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286968640)))]; + fp16 var_1280_to_fp16 = const()[name = string("op_1280_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1290_cast_fp16 = layer_norm(axes = var_1290_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1280_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1290_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = string("op_1301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286970752)))]; + tensor var_1302_to_fp16 = const()[name = string("op_1302_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289067968)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1302_to_fp16, weight = var_1301_to_fp16, x = var_1290_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1305_to_fp16 = const()[name = string("op_1305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289070080)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1305_to_fp16, x = var_1290_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1309_to_fp16 = const()[name = string("op_1309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291167296)))]; + tensor var_1310_to_fp16 = const()[name = string("op_1310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293264512)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1310_to_fp16, weight = var_1309_to_fp16, x = var_1290_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor var_1318 = const()[name = string("op_1318"), val = tensor([1, 1500, 16, -1])]; + tensor var_1319_cast_fp16 = reshape(shape = var_1318, x = linear_66_cast_fp16)[name = string("op_1319_cast_fp16")]; + tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1319_cast_fp16, y = const_190_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1325 = const()[name = string("op_1325"), val = tensor([1, 1500, 16, -1])]; + tensor var_1326_cast_fp16 = reshape(shape = var_1325, x = linear_67_cast_fp16)[name = string("op_1326_cast_fp16")]; + tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_47_cast_fp16 = mul(x = var_1326_cast_fp16, y = const_191_to_fp16)[name = string("k_47_cast_fp16")]; + tensor var_1332 = const()[name = string("op_1332"), val = tensor([1, 1500, 16, -1])]; + tensor var_1333_cast_fp16 = reshape(shape = var_1332, x = linear_68_cast_fp16)[name = string("op_1333_cast_fp16")]; + tensor var_1334 = const()[name = string("op_1334"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = k_47_cast_fp16)[name = string("transpose_193")]; + tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = q_47_cast_fp16)[name = string("transpose_194")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_118, y = transpose_119)[name = string("qk_23_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_1274, x = qk_23_cast_fp16)[name = string("op_1338_cast_fp16")]; + bool var_1340_transpose_x_0 = const()[name = string("op_1340_transpose_x_0"), val = bool(false)]; + bool var_1340_transpose_y_0 = const()[name = string("op_1340_transpose_y_0"), val = bool(false)]; + tensor v_47_cast_fp16 = transpose(perm = var_1334, x = var_1333_cast_fp16)[name = string("transpose_195")]; + tensor var_1340_cast_fp16 = matmul(transpose_x = var_1340_transpose_x_0, transpose_y = var_1340_transpose_y_0, x = var_1338_cast_fp16, y = v_47_cast_fp16)[name = string("op_1340_cast_fp16")]; + tensor var_1341 = const()[name = string("op_1341"), val = tensor([0, 2, 1, 3])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 1500, 1024])]; + tensor var_1342_cast_fp16 = transpose(perm = var_1341, x = var_1340_cast_fp16)[name = string("transpose_192")]; + tensor x_143_cast_fp16 = reshape(shape = concat_11, x = var_1342_cast_fp16)[name = string("x_143_cast_fp16")]; + tensor var_1346_to_fp16 = const()[name = string("op_1346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(293266624)))]; + tensor var_1347_to_fp16 = const()[name = string("op_1347_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295363840)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1347_to_fp16, weight = var_1346_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1354_axes_0 = const()[name = string("op_1354_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295365952)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295368064)))]; + tensor var_1354_cast_fp16 = layer_norm(axes = var_1354_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1280_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1354_cast_fp16")]; + tensor var_1363_to_fp16 = const()[name = string("op_1363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295370176)))]; + tensor var_1364_to_fp16 = const()[name = string("op_1364_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303758848)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1364_to_fp16, weight = var_1363_to_fp16, x = var_1354_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")]; + tensor x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_1369_to_fp16 = const()[name = string("op_1369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303767104)))]; + tensor var_1370_to_fp16 = const()[name = string("op_1370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312155776)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1370_to_fp16, weight = var_1369_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_151_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_151_cast_fp16")]; + int32 var_1380 = const()[name = string("op_1380"), val = int32(-1)]; + tensor var_1396_axes_0 = const()[name = string("op_1396_axes_0"), val = tensor([-1])]; + tensor blocks_12_attn_ln_weight_to_fp16 = const()[name = string("blocks_12_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312157888)))]; + tensor blocks_12_attn_ln_bias_to_fp16 = const()[name = string("blocks_12_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312160000)))]; + fp16 var_1386_to_fp16 = const()[name = string("op_1386_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1396_cast_fp16 = layer_norm(axes = var_1396_axes_0, beta = blocks_12_attn_ln_bias_to_fp16, epsilon = var_1386_to_fp16, gamma = blocks_12_attn_ln_weight_to_fp16, x = x_151_cast_fp16)[name = string("op_1396_cast_fp16")]; + tensor var_1407_to_fp16 = const()[name = string("op_1407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312162112)))]; + tensor var_1408_to_fp16 = const()[name = string("op_1408_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314259328)))]; + tensor linear_72_cast_fp16 = linear(bias = var_1408_to_fp16, weight = var_1407_to_fp16, x = var_1396_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_1411_to_fp16 = const()[name = string("op_1411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314261440)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1411_to_fp16, x = var_1396_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_1415_to_fp16 = const()[name = string("op_1415_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316358656)))]; + tensor var_1416_to_fp16 = const()[name = string("op_1416_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318455872)))]; + tensor linear_74_cast_fp16 = linear(bias = var_1416_to_fp16, weight = var_1415_to_fp16, x = var_1396_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_1424 = const()[name = string("op_1424"), val = tensor([1, 1500, 16, -1])]; + tensor var_1425_cast_fp16 = reshape(shape = var_1424, x = linear_72_cast_fp16)[name = string("op_1425_cast_fp16")]; + tensor const_192_to_fp16 = const()[name = string("const_192_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1425_cast_fp16, y = const_192_to_fp16)[name = string("q_51_cast_fp16")]; + tensor var_1431 = const()[name = string("op_1431"), val = tensor([1, 1500, 16, -1])]; + tensor var_1432_cast_fp16 = reshape(shape = var_1431, x = linear_73_cast_fp16)[name = string("op_1432_cast_fp16")]; + tensor const_193_to_fp16 = const()[name = string("const_193_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_51_cast_fp16 = mul(x = var_1432_cast_fp16, y = const_193_to_fp16)[name = string("k_51_cast_fp16")]; + tensor var_1438 = const()[name = string("op_1438"), val = tensor([1, 1500, 16, -1])]; + tensor var_1439_cast_fp16 = reshape(shape = var_1438, x = linear_74_cast_fp16)[name = string("op_1439_cast_fp16")]; + tensor var_1440 = const()[name = string("op_1440"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = k_51_cast_fp16)[name = string("transpose_189")]; + tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = q_51_cast_fp16)[name = string("transpose_190")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_120, y = transpose_121)[name = string("qk_25_cast_fp16")]; + tensor var_1444_cast_fp16 = softmax(axis = var_1380, x = qk_25_cast_fp16)[name = string("op_1444_cast_fp16")]; + bool var_1446_transpose_x_0 = const()[name = string("op_1446_transpose_x_0"), val = bool(false)]; + bool var_1446_transpose_y_0 = const()[name = string("op_1446_transpose_y_0"), val = bool(false)]; + tensor v_51_cast_fp16 = transpose(perm = var_1440, x = var_1439_cast_fp16)[name = string("transpose_191")]; + tensor var_1446_cast_fp16 = matmul(transpose_x = var_1446_transpose_x_0, transpose_y = var_1446_transpose_y_0, x = var_1444_cast_fp16, y = v_51_cast_fp16)[name = string("op_1446_cast_fp16")]; + tensor var_1447 = const()[name = string("op_1447"), val = tensor([0, 2, 1, 3])]; + tensor concat_12 = const()[name = string("concat_12"), val = tensor([1, 1500, 1024])]; + tensor var_1448_cast_fp16 = transpose(perm = var_1447, x = var_1446_cast_fp16)[name = string("transpose_188")]; + tensor x_155_cast_fp16 = reshape(shape = concat_12, x = var_1448_cast_fp16)[name = string("x_155_cast_fp16")]; + tensor var_1452_to_fp16 = const()[name = string("op_1452_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318457984)))]; + tensor var_1453_to_fp16 = const()[name = string("op_1453_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320555200)))]; + tensor linear_75_cast_fp16 = linear(bias = var_1453_to_fp16, weight = var_1452_to_fp16, x = x_155_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_157_cast_fp16 = add(x = x_151_cast_fp16, y = linear_75_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1460_axes_0 = const()[name = string("op_1460_axes_0"), val = tensor([-1])]; + tensor blocks_12_mlp_ln_weight_to_fp16 = const()[name = string("blocks_12_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320557312)))]; + tensor blocks_12_mlp_ln_bias_to_fp16 = const()[name = string("blocks_12_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320559424)))]; + tensor var_1460_cast_fp16 = layer_norm(axes = var_1460_axes_0, beta = blocks_12_mlp_ln_bias_to_fp16, epsilon = var_1386_to_fp16, gamma = blocks_12_mlp_ln_weight_to_fp16, x = x_157_cast_fp16)[name = string("op_1460_cast_fp16")]; + tensor var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320561536)))]; + tensor var_1470_to_fp16 = const()[name = string("op_1470_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328950208)))]; + tensor linear_76_cast_fp16 = linear(bias = var_1470_to_fp16, weight = var_1469_to_fp16, x = var_1460_cast_fp16)[name = string("linear_76_cast_fp16")]; + string x_161_mode_0 = const()[name = string("x_161_mode_0"), val = string("EXACT")]; + tensor x_161_cast_fp16 = gelu(mode = x_161_mode_0, x = linear_76_cast_fp16)[name = string("x_161_cast_fp16")]; + tensor var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(328958464)))]; + tensor var_1476_to_fp16 = const()[name = string("op_1476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337347136)))]; + tensor linear_77_cast_fp16 = linear(bias = var_1476_to_fp16, weight = var_1475_to_fp16, x = x_161_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_163_cast_fp16 = add(x = x_157_cast_fp16, y = linear_77_cast_fp16)[name = string("x_163_cast_fp16")]; + int32 var_1486 = const()[name = string("op_1486"), val = int32(-1)]; + tensor var_1502_axes_0 = const()[name = string("op_1502_axes_0"), val = tensor([-1])]; + tensor blocks_13_attn_ln_weight_to_fp16 = const()[name = string("blocks_13_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337349248)))]; + tensor blocks_13_attn_ln_bias_to_fp16 = const()[name = string("blocks_13_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337351360)))]; + fp16 var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1502_cast_fp16 = layer_norm(axes = var_1502_axes_0, beta = blocks_13_attn_ln_bias_to_fp16, epsilon = var_1492_to_fp16, gamma = blocks_13_attn_ln_weight_to_fp16, x = x_163_cast_fp16)[name = string("op_1502_cast_fp16")]; + tensor var_1513_to_fp16 = const()[name = string("op_1513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337353472)))]; + tensor var_1514_to_fp16 = const()[name = string("op_1514_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339450688)))]; + tensor linear_78_cast_fp16 = linear(bias = var_1514_to_fp16, weight = var_1513_to_fp16, x = var_1502_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor var_1517_to_fp16 = const()[name = string("op_1517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339452800)))]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1517_to_fp16, x = var_1502_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor var_1521_to_fp16 = const()[name = string("op_1521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341550016)))]; + tensor var_1522_to_fp16 = const()[name = string("op_1522_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343647232)))]; + tensor linear_80_cast_fp16 = linear(bias = var_1522_to_fp16, weight = var_1521_to_fp16, x = var_1502_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_1530 = const()[name = string("op_1530"), val = tensor([1, 1500, 16, -1])]; + tensor var_1531_cast_fp16 = reshape(shape = var_1530, x = linear_78_cast_fp16)[name = string("op_1531_cast_fp16")]; + tensor const_194_to_fp16 = const()[name = string("const_194_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1531_cast_fp16, y = const_194_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1537 = const()[name = string("op_1537"), val = tensor([1, 1500, 16, -1])]; + tensor var_1538_cast_fp16 = reshape(shape = var_1537, x = linear_79_cast_fp16)[name = string("op_1538_cast_fp16")]; + tensor const_195_to_fp16 = const()[name = string("const_195_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1538_cast_fp16, y = const_195_to_fp16)[name = string("k_55_cast_fp16")]; + tensor var_1544 = const()[name = string("op_1544"), val = tensor([1, 1500, 16, -1])]; + tensor var_1545_cast_fp16 = reshape(shape = var_1544, x = linear_80_cast_fp16)[name = string("op_1545_cast_fp16")]; + tensor var_1546 = const()[name = string("op_1546"), val = tensor([0, 2, 1, 3])]; + bool qk_27_transpose_x_0 = const()[name = string("qk_27_transpose_x_0"), val = bool(false)]; + bool qk_27_transpose_y_0 = const()[name = string("qk_27_transpose_y_0"), val = bool(false)]; + tensor transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = k_55_cast_fp16)[name = string("transpose_185")]; + tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = q_55_cast_fp16)[name = string("transpose_186")]; + tensor qk_27_cast_fp16 = matmul(transpose_x = qk_27_transpose_x_0, transpose_y = qk_27_transpose_y_0, x = transpose_122, y = transpose_123)[name = string("qk_27_cast_fp16")]; + tensor var_1550_cast_fp16 = softmax(axis = var_1486, x = qk_27_cast_fp16)[name = string("op_1550_cast_fp16")]; + bool var_1552_transpose_x_0 = const()[name = string("op_1552_transpose_x_0"), val = bool(false)]; + bool var_1552_transpose_y_0 = const()[name = string("op_1552_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1546, x = var_1545_cast_fp16)[name = string("transpose_187")]; + tensor var_1552_cast_fp16 = matmul(transpose_x = var_1552_transpose_x_0, transpose_y = var_1552_transpose_y_0, x = var_1550_cast_fp16, y = v_55_cast_fp16)[name = string("op_1552_cast_fp16")]; + tensor var_1553 = const()[name = string("op_1553"), val = tensor([0, 2, 1, 3])]; + tensor concat_13 = const()[name = string("concat_13"), val = tensor([1, 1500, 1024])]; + tensor var_1554_cast_fp16 = transpose(perm = var_1553, x = var_1552_cast_fp16)[name = string("transpose_184")]; + tensor x_167_cast_fp16 = reshape(shape = concat_13, x = var_1554_cast_fp16)[name = string("x_167_cast_fp16")]; + tensor var_1558_to_fp16 = const()[name = string("op_1558_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343649344)))]; + tensor var_1559_to_fp16 = const()[name = string("op_1559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345746560)))]; + tensor linear_81_cast_fp16 = linear(bias = var_1559_to_fp16, weight = var_1558_to_fp16, x = x_167_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor x_169_cast_fp16 = add(x = x_163_cast_fp16, y = linear_81_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_1566_axes_0 = const()[name = string("op_1566_axes_0"), val = tensor([-1])]; + tensor blocks_13_mlp_ln_weight_to_fp16 = const()[name = string("blocks_13_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345748672)))]; + tensor blocks_13_mlp_ln_bias_to_fp16 = const()[name = string("blocks_13_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345750784)))]; + tensor var_1566_cast_fp16 = layer_norm(axes = var_1566_axes_0, beta = blocks_13_mlp_ln_bias_to_fp16, epsilon = var_1492_to_fp16, gamma = blocks_13_mlp_ln_weight_to_fp16, x = x_169_cast_fp16)[name = string("op_1566_cast_fp16")]; + tensor var_1575_to_fp16 = const()[name = string("op_1575_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345752896)))]; + tensor var_1576_to_fp16 = const()[name = string("op_1576_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354141568)))]; + tensor linear_82_cast_fp16 = linear(bias = var_1576_to_fp16, weight = var_1575_to_fp16, x = var_1566_cast_fp16)[name = string("linear_82_cast_fp16")]; + string x_173_mode_0 = const()[name = string("x_173_mode_0"), val = string("EXACT")]; + tensor x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = linear_82_cast_fp16)[name = string("x_173_cast_fp16")]; + tensor var_1581_to_fp16 = const()[name = string("op_1581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354149824)))]; + tensor var_1582_to_fp16 = const()[name = string("op_1582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362538496)))]; + tensor linear_83_cast_fp16 = linear(bias = var_1582_to_fp16, weight = var_1581_to_fp16, x = x_173_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_175_cast_fp16 = add(x = x_169_cast_fp16, y = linear_83_cast_fp16)[name = string("x_175_cast_fp16")]; + int32 var_1592 = const()[name = string("op_1592"), val = int32(-1)]; + tensor var_1608_axes_0 = const()[name = string("op_1608_axes_0"), val = tensor([-1])]; + tensor blocks_14_attn_ln_weight_to_fp16 = const()[name = string("blocks_14_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362540608)))]; + tensor blocks_14_attn_ln_bias_to_fp16 = const()[name = string("blocks_14_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362542720)))]; + fp16 var_1598_to_fp16 = const()[name = string("op_1598_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1608_cast_fp16 = layer_norm(axes = var_1608_axes_0, beta = blocks_14_attn_ln_bias_to_fp16, epsilon = var_1598_to_fp16, gamma = blocks_14_attn_ln_weight_to_fp16, x = x_175_cast_fp16)[name = string("op_1608_cast_fp16")]; + tensor var_1619_to_fp16 = const()[name = string("op_1619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362544832)))]; + tensor var_1620_to_fp16 = const()[name = string("op_1620_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364642048)))]; + tensor linear_84_cast_fp16 = linear(bias = var_1620_to_fp16, weight = var_1619_to_fp16, x = var_1608_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor var_1623_to_fp16 = const()[name = string("op_1623_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364644160)))]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1623_to_fp16, x = var_1608_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor var_1627_to_fp16 = const()[name = string("op_1627_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366741376)))]; + tensor var_1628_to_fp16 = const()[name = string("op_1628_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368838592)))]; + tensor linear_86_cast_fp16 = linear(bias = var_1628_to_fp16, weight = var_1627_to_fp16, x = var_1608_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor var_1636 = const()[name = string("op_1636"), val = tensor([1, 1500, 16, -1])]; + tensor var_1637_cast_fp16 = reshape(shape = var_1636, x = linear_84_cast_fp16)[name = string("op_1637_cast_fp16")]; + tensor const_196_to_fp16 = const()[name = string("const_196_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1637_cast_fp16, y = const_196_to_fp16)[name = string("q_59_cast_fp16")]; + tensor var_1643 = const()[name = string("op_1643"), val = tensor([1, 1500, 16, -1])]; + tensor var_1644_cast_fp16 = reshape(shape = var_1643, x = linear_85_cast_fp16)[name = string("op_1644_cast_fp16")]; + tensor const_197_to_fp16 = const()[name = string("const_197_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1644_cast_fp16, y = const_197_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1650 = const()[name = string("op_1650"), val = tensor([1, 1500, 16, -1])]; + tensor var_1651_cast_fp16 = reshape(shape = var_1650, x = linear_86_cast_fp16)[name = string("op_1651_cast_fp16")]; + tensor var_1652 = const()[name = string("op_1652"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = k_59_cast_fp16)[name = string("transpose_181")]; + tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = q_59_cast_fp16)[name = string("transpose_182")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_124, y = transpose_125)[name = string("qk_29_cast_fp16")]; + tensor var_1656_cast_fp16 = softmax(axis = var_1592, x = qk_29_cast_fp16)[name = string("op_1656_cast_fp16")]; + bool var_1658_transpose_x_0 = const()[name = string("op_1658_transpose_x_0"), val = bool(false)]; + bool var_1658_transpose_y_0 = const()[name = string("op_1658_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1652, x = var_1651_cast_fp16)[name = string("transpose_183")]; + tensor var_1658_cast_fp16 = matmul(transpose_x = var_1658_transpose_x_0, transpose_y = var_1658_transpose_y_0, x = var_1656_cast_fp16, y = v_59_cast_fp16)[name = string("op_1658_cast_fp16")]; + tensor var_1659 = const()[name = string("op_1659"), val = tensor([0, 2, 1, 3])]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 1500, 1024])]; + tensor var_1660_cast_fp16 = transpose(perm = var_1659, x = var_1658_cast_fp16)[name = string("transpose_180")]; + tensor x_179_cast_fp16 = reshape(shape = concat_14, x = var_1660_cast_fp16)[name = string("x_179_cast_fp16")]; + tensor var_1664_to_fp16 = const()[name = string("op_1664_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368840704)))]; + tensor var_1665_to_fp16 = const()[name = string("op_1665_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370937920)))]; + tensor linear_87_cast_fp16 = linear(bias = var_1665_to_fp16, weight = var_1664_to_fp16, x = x_179_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_87_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_1672_axes_0 = const()[name = string("op_1672_axes_0"), val = tensor([-1])]; + tensor blocks_14_mlp_ln_weight_to_fp16 = const()[name = string("blocks_14_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370940032)))]; + tensor blocks_14_mlp_ln_bias_to_fp16 = const()[name = string("blocks_14_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370942144)))]; + tensor var_1672_cast_fp16 = layer_norm(axes = var_1672_axes_0, beta = blocks_14_mlp_ln_bias_to_fp16, epsilon = var_1598_to_fp16, gamma = blocks_14_mlp_ln_weight_to_fp16, x = x_181_cast_fp16)[name = string("op_1672_cast_fp16")]; + tensor var_1681_to_fp16 = const()[name = string("op_1681_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370944256)))]; + tensor var_1682_to_fp16 = const()[name = string("op_1682_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379332928)))]; + tensor linear_88_cast_fp16 = linear(bias = var_1682_to_fp16, weight = var_1681_to_fp16, x = var_1672_cast_fp16)[name = string("linear_88_cast_fp16")]; + string x_185_mode_0 = const()[name = string("x_185_mode_0"), val = string("EXACT")]; + tensor x_185_cast_fp16 = gelu(mode = x_185_mode_0, x = linear_88_cast_fp16)[name = string("x_185_cast_fp16")]; + tensor var_1687_to_fp16 = const()[name = string("op_1687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379341184)))]; + tensor var_1688_to_fp16 = const()[name = string("op_1688_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387729856)))]; + tensor linear_89_cast_fp16 = linear(bias = var_1688_to_fp16, weight = var_1687_to_fp16, x = x_185_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor x_187_cast_fp16 = add(x = x_181_cast_fp16, y = linear_89_cast_fp16)[name = string("x_187_cast_fp16")]; + int32 var_1698 = const()[name = string("op_1698"), val = int32(-1)]; + tensor var_1714_axes_0 = const()[name = string("op_1714_axes_0"), val = tensor([-1])]; + tensor blocks_15_attn_ln_weight_to_fp16 = const()[name = string("blocks_15_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387731968)))]; + tensor blocks_15_attn_ln_bias_to_fp16 = const()[name = string("blocks_15_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387734080)))]; + fp16 var_1704_to_fp16 = const()[name = string("op_1704_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1714_cast_fp16 = layer_norm(axes = var_1714_axes_0, beta = blocks_15_attn_ln_bias_to_fp16, epsilon = var_1704_to_fp16, gamma = blocks_15_attn_ln_weight_to_fp16, x = x_187_cast_fp16)[name = string("op_1714_cast_fp16")]; + tensor var_1725_to_fp16 = const()[name = string("op_1725_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387736192)))]; + tensor var_1726_to_fp16 = const()[name = string("op_1726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389833408)))]; + tensor linear_90_cast_fp16 = linear(bias = var_1726_to_fp16, weight = var_1725_to_fp16, x = var_1714_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_1729_to_fp16 = const()[name = string("op_1729_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389835520)))]; + tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1729_to_fp16, x = var_1714_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor var_1733_to_fp16 = const()[name = string("op_1733_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391932736)))]; + tensor var_1734_to_fp16 = const()[name = string("op_1734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394029952)))]; + tensor linear_92_cast_fp16 = linear(bias = var_1734_to_fp16, weight = var_1733_to_fp16, x = var_1714_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor var_1742 = const()[name = string("op_1742"), val = tensor([1, 1500, 16, -1])]; + tensor var_1743_cast_fp16 = reshape(shape = var_1742, x = linear_90_cast_fp16)[name = string("op_1743_cast_fp16")]; + tensor const_198_to_fp16 = const()[name = string("const_198_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1743_cast_fp16, y = const_198_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1749 = const()[name = string("op_1749"), val = tensor([1, 1500, 16, -1])]; + tensor var_1750_cast_fp16 = reshape(shape = var_1749, x = linear_91_cast_fp16)[name = string("op_1750_cast_fp16")]; + tensor const_199_to_fp16 = const()[name = string("const_199_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_63_cast_fp16 = mul(x = var_1750_cast_fp16, y = const_199_to_fp16)[name = string("k_63_cast_fp16")]; + tensor var_1756 = const()[name = string("op_1756"), val = tensor([1, 1500, 16, -1])]; + tensor var_1757_cast_fp16 = reshape(shape = var_1756, x = linear_92_cast_fp16)[name = string("op_1757_cast_fp16")]; + tensor var_1758 = const()[name = string("op_1758"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = k_63_cast_fp16)[name = string("transpose_177")]; + tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = q_63_cast_fp16)[name = string("transpose_178")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_126, y = transpose_127)[name = string("qk_31_cast_fp16")]; + tensor var_1762_cast_fp16 = softmax(axis = var_1698, x = qk_31_cast_fp16)[name = string("op_1762_cast_fp16")]; + bool var_1764_transpose_x_0 = const()[name = string("op_1764_transpose_x_0"), val = bool(false)]; + bool var_1764_transpose_y_0 = const()[name = string("op_1764_transpose_y_0"), val = bool(false)]; + tensor v_63_cast_fp16 = transpose(perm = var_1758, x = var_1757_cast_fp16)[name = string("transpose_179")]; + tensor var_1764_cast_fp16 = matmul(transpose_x = var_1764_transpose_x_0, transpose_y = var_1764_transpose_y_0, x = var_1762_cast_fp16, y = v_63_cast_fp16)[name = string("op_1764_cast_fp16")]; + tensor var_1765 = const()[name = string("op_1765"), val = tensor([0, 2, 1, 3])]; + tensor concat_15 = const()[name = string("concat_15"), val = tensor([1, 1500, 1024])]; + tensor var_1766_cast_fp16 = transpose(perm = var_1765, x = var_1764_cast_fp16)[name = string("transpose_176")]; + tensor x_191_cast_fp16 = reshape(shape = concat_15, x = var_1766_cast_fp16)[name = string("x_191_cast_fp16")]; + tensor var_1770_to_fp16 = const()[name = string("op_1770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(394032064)))]; + tensor var_1771_to_fp16 = const()[name = string("op_1771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396129280)))]; + tensor linear_93_cast_fp16 = linear(bias = var_1771_to_fp16, weight = var_1770_to_fp16, x = x_191_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_193_cast_fp16 = add(x = x_187_cast_fp16, y = linear_93_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_1778_axes_0 = const()[name = string("op_1778_axes_0"), val = tensor([-1])]; + tensor blocks_15_mlp_ln_weight_to_fp16 = const()[name = string("blocks_15_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396131392)))]; + tensor blocks_15_mlp_ln_bias_to_fp16 = const()[name = string("blocks_15_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396133504)))]; + tensor var_1778_cast_fp16 = layer_norm(axes = var_1778_axes_0, beta = blocks_15_mlp_ln_bias_to_fp16, epsilon = var_1704_to_fp16, gamma = blocks_15_mlp_ln_weight_to_fp16, x = x_193_cast_fp16)[name = string("op_1778_cast_fp16")]; + tensor var_1787_to_fp16 = const()[name = string("op_1787_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396135616)))]; + tensor var_1788_to_fp16 = const()[name = string("op_1788_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404524288)))]; + tensor linear_94_cast_fp16 = linear(bias = var_1788_to_fp16, weight = var_1787_to_fp16, x = var_1778_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_197_mode_0 = const()[name = string("x_197_mode_0"), val = string("EXACT")]; + tensor x_197_cast_fp16 = gelu(mode = x_197_mode_0, x = linear_94_cast_fp16)[name = string("x_197_cast_fp16")]; + tensor var_1793_to_fp16 = const()[name = string("op_1793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404532544)))]; + tensor var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412921216)))]; + tensor linear_95_cast_fp16 = linear(bias = var_1794_to_fp16, weight = var_1793_to_fp16, x = x_197_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor x_199_cast_fp16 = add(x = x_193_cast_fp16, y = linear_95_cast_fp16)[name = string("x_199_cast_fp16")]; + int32 var_1804 = const()[name = string("op_1804"), val = int32(-1)]; + tensor var_1820_axes_0 = const()[name = string("op_1820_axes_0"), val = tensor([-1])]; + tensor blocks_16_attn_ln_weight_to_fp16 = const()[name = string("blocks_16_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412923328)))]; + tensor blocks_16_attn_ln_bias_to_fp16 = const()[name = string("blocks_16_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412925440)))]; + fp16 var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1820_cast_fp16 = layer_norm(axes = var_1820_axes_0, beta = blocks_16_attn_ln_bias_to_fp16, epsilon = var_1810_to_fp16, gamma = blocks_16_attn_ln_weight_to_fp16, x = x_199_cast_fp16)[name = string("op_1820_cast_fp16")]; + tensor var_1831_to_fp16 = const()[name = string("op_1831_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412927552)))]; + tensor var_1832_to_fp16 = const()[name = string("op_1832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415024768)))]; + tensor linear_96_cast_fp16 = linear(bias = var_1832_to_fp16, weight = var_1831_to_fp16, x = var_1820_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor var_1835_to_fp16 = const()[name = string("op_1835_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415026880)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1835_to_fp16, x = var_1820_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor var_1839_to_fp16 = const()[name = string("op_1839_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417124096)))]; + tensor var_1840_to_fp16 = const()[name = string("op_1840_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419221312)))]; + tensor linear_98_cast_fp16 = linear(bias = var_1840_to_fp16, weight = var_1839_to_fp16, x = var_1820_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor var_1848 = const()[name = string("op_1848"), val = tensor([1, 1500, 16, -1])]; + tensor var_1849_cast_fp16 = reshape(shape = var_1848, x = linear_96_cast_fp16)[name = string("op_1849_cast_fp16")]; + tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1849_cast_fp16, y = const_200_to_fp16)[name = string("q_67_cast_fp16")]; + tensor var_1855 = const()[name = string("op_1855"), val = tensor([1, 1500, 16, -1])]; + tensor var_1856_cast_fp16 = reshape(shape = var_1855, x = linear_97_cast_fp16)[name = string("op_1856_cast_fp16")]; + tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_67_cast_fp16 = mul(x = var_1856_cast_fp16, y = const_201_to_fp16)[name = string("k_67_cast_fp16")]; + tensor var_1862 = const()[name = string("op_1862"), val = tensor([1, 1500, 16, -1])]; + tensor var_1863_cast_fp16 = reshape(shape = var_1862, x = linear_98_cast_fp16)[name = string("op_1863_cast_fp16")]; + tensor var_1864 = const()[name = string("op_1864"), val = tensor([0, 2, 1, 3])]; + bool qk_33_transpose_x_0 = const()[name = string("qk_33_transpose_x_0"), val = bool(false)]; + bool qk_33_transpose_y_0 = const()[name = string("qk_33_transpose_y_0"), val = bool(false)]; + tensor transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = k_67_cast_fp16)[name = string("transpose_173")]; + tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = q_67_cast_fp16)[name = string("transpose_174")]; + tensor qk_33_cast_fp16 = matmul(transpose_x = qk_33_transpose_x_0, transpose_y = qk_33_transpose_y_0, x = transpose_128, y = transpose_129)[name = string("qk_33_cast_fp16")]; + tensor var_1868_cast_fp16 = softmax(axis = var_1804, x = qk_33_cast_fp16)[name = string("op_1868_cast_fp16")]; + bool var_1870_transpose_x_0 = const()[name = string("op_1870_transpose_x_0"), val = bool(false)]; + bool var_1870_transpose_y_0 = const()[name = string("op_1870_transpose_y_0"), val = bool(false)]; + tensor v_67_cast_fp16 = transpose(perm = var_1864, x = var_1863_cast_fp16)[name = string("transpose_175")]; + tensor var_1870_cast_fp16 = matmul(transpose_x = var_1870_transpose_x_0, transpose_y = var_1870_transpose_y_0, x = var_1868_cast_fp16, y = v_67_cast_fp16)[name = string("op_1870_cast_fp16")]; + tensor var_1871 = const()[name = string("op_1871"), val = tensor([0, 2, 1, 3])]; + tensor concat_16 = const()[name = string("concat_16"), val = tensor([1, 1500, 1024])]; + tensor var_1872_cast_fp16 = transpose(perm = var_1871, x = var_1870_cast_fp16)[name = string("transpose_172")]; + tensor x_203_cast_fp16 = reshape(shape = concat_16, x = var_1872_cast_fp16)[name = string("x_203_cast_fp16")]; + tensor var_1876_to_fp16 = const()[name = string("op_1876_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419223424)))]; + tensor var_1877_to_fp16 = const()[name = string("op_1877_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421320640)))]; + tensor linear_99_cast_fp16 = linear(bias = var_1877_to_fp16, weight = var_1876_to_fp16, x = x_203_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor x_205_cast_fp16 = add(x = x_199_cast_fp16, y = linear_99_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_1884_axes_0 = const()[name = string("op_1884_axes_0"), val = tensor([-1])]; + tensor blocks_16_mlp_ln_weight_to_fp16 = const()[name = string("blocks_16_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421322752)))]; + tensor blocks_16_mlp_ln_bias_to_fp16 = const()[name = string("blocks_16_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421324864)))]; + tensor var_1884_cast_fp16 = layer_norm(axes = var_1884_axes_0, beta = blocks_16_mlp_ln_bias_to_fp16, epsilon = var_1810_to_fp16, gamma = blocks_16_mlp_ln_weight_to_fp16, x = x_205_cast_fp16)[name = string("op_1884_cast_fp16")]; + tensor var_1893_to_fp16 = const()[name = string("op_1893_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421326976)))]; + tensor var_1894_to_fp16 = const()[name = string("op_1894_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429715648)))]; + tensor linear_100_cast_fp16 = linear(bias = var_1894_to_fp16, weight = var_1893_to_fp16, x = var_1884_cast_fp16)[name = string("linear_100_cast_fp16")]; + string x_209_mode_0 = const()[name = string("x_209_mode_0"), val = string("EXACT")]; + tensor x_209_cast_fp16 = gelu(mode = x_209_mode_0, x = linear_100_cast_fp16)[name = string("x_209_cast_fp16")]; + tensor var_1899_to_fp16 = const()[name = string("op_1899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429723904)))]; + tensor var_1900_to_fp16 = const()[name = string("op_1900_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438112576)))]; + tensor linear_101_cast_fp16 = linear(bias = var_1900_to_fp16, weight = var_1899_to_fp16, x = x_209_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor x_211_cast_fp16 = add(x = x_205_cast_fp16, y = linear_101_cast_fp16)[name = string("x_211_cast_fp16")]; + int32 var_1910 = const()[name = string("op_1910"), val = int32(-1)]; + tensor var_1926_axes_0 = const()[name = string("op_1926_axes_0"), val = tensor([-1])]; + tensor blocks_17_attn_ln_weight_to_fp16 = const()[name = string("blocks_17_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438114688)))]; + tensor blocks_17_attn_ln_bias_to_fp16 = const()[name = string("blocks_17_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438116800)))]; + fp16 var_1916_to_fp16 = const()[name = string("op_1916_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1926_cast_fp16 = layer_norm(axes = var_1926_axes_0, beta = blocks_17_attn_ln_bias_to_fp16, epsilon = var_1916_to_fp16, gamma = blocks_17_attn_ln_weight_to_fp16, x = x_211_cast_fp16)[name = string("op_1926_cast_fp16")]; + tensor var_1937_to_fp16 = const()[name = string("op_1937_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438118912)))]; + tensor var_1938_to_fp16 = const()[name = string("op_1938_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440216128)))]; + tensor linear_102_cast_fp16 = linear(bias = var_1938_to_fp16, weight = var_1937_to_fp16, x = var_1926_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_1941_to_fp16 = const()[name = string("op_1941_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440218240)))]; + tensor linear_103_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1941_to_fp16, x = var_1926_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor var_1945_to_fp16 = const()[name = string("op_1945_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442315456)))]; + tensor var_1946_to_fp16 = const()[name = string("op_1946_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444412672)))]; + tensor linear_104_cast_fp16 = linear(bias = var_1946_to_fp16, weight = var_1945_to_fp16, x = var_1926_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor var_1954 = const()[name = string("op_1954"), val = tensor([1, 1500, 16, -1])]; + tensor var_1955_cast_fp16 = reshape(shape = var_1954, x = linear_102_cast_fp16)[name = string("op_1955_cast_fp16")]; + tensor const_202_to_fp16 = const()[name = string("const_202_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_1955_cast_fp16, y = const_202_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_1961 = const()[name = string("op_1961"), val = tensor([1, 1500, 16, -1])]; + tensor var_1962_cast_fp16 = reshape(shape = var_1961, x = linear_103_cast_fp16)[name = string("op_1962_cast_fp16")]; + tensor const_203_to_fp16 = const()[name = string("const_203_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_71_cast_fp16 = mul(x = var_1962_cast_fp16, y = const_203_to_fp16)[name = string("k_71_cast_fp16")]; + tensor var_1968 = const()[name = string("op_1968"), val = tensor([1, 1500, 16, -1])]; + tensor var_1969_cast_fp16 = reshape(shape = var_1968, x = linear_104_cast_fp16)[name = string("op_1969_cast_fp16")]; + tensor var_1970 = const()[name = string("op_1970"), val = tensor([0, 2, 1, 3])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = k_71_cast_fp16)[name = string("transpose_169")]; + tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = q_71_cast_fp16)[name = string("transpose_170")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_130, y = transpose_131)[name = string("qk_35_cast_fp16")]; + tensor var_1974_cast_fp16 = softmax(axis = var_1910, x = qk_35_cast_fp16)[name = string("op_1974_cast_fp16")]; + bool var_1976_transpose_x_0 = const()[name = string("op_1976_transpose_x_0"), val = bool(false)]; + bool var_1976_transpose_y_0 = const()[name = string("op_1976_transpose_y_0"), val = bool(false)]; + tensor v_71_cast_fp16 = transpose(perm = var_1970, x = var_1969_cast_fp16)[name = string("transpose_171")]; + tensor var_1976_cast_fp16 = matmul(transpose_x = var_1976_transpose_x_0, transpose_y = var_1976_transpose_y_0, x = var_1974_cast_fp16, y = v_71_cast_fp16)[name = string("op_1976_cast_fp16")]; + tensor var_1977 = const()[name = string("op_1977"), val = tensor([0, 2, 1, 3])]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([1, 1500, 1024])]; + tensor var_1978_cast_fp16 = transpose(perm = var_1977, x = var_1976_cast_fp16)[name = string("transpose_168")]; + tensor x_215_cast_fp16 = reshape(shape = concat_17, x = var_1978_cast_fp16)[name = string("x_215_cast_fp16")]; + tensor var_1982_to_fp16 = const()[name = string("op_1982_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(444414784)))]; + tensor var_1983_to_fp16 = const()[name = string("op_1983_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446512000)))]; + tensor linear_105_cast_fp16 = linear(bias = var_1983_to_fp16, weight = var_1982_to_fp16, x = x_215_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor x_217_cast_fp16 = add(x = x_211_cast_fp16, y = linear_105_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_1990_axes_0 = const()[name = string("op_1990_axes_0"), val = tensor([-1])]; + tensor blocks_17_mlp_ln_weight_to_fp16 = const()[name = string("blocks_17_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446514112)))]; + tensor blocks_17_mlp_ln_bias_to_fp16 = const()[name = string("blocks_17_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446516224)))]; + tensor var_1990_cast_fp16 = layer_norm(axes = var_1990_axes_0, beta = blocks_17_mlp_ln_bias_to_fp16, epsilon = var_1916_to_fp16, gamma = blocks_17_mlp_ln_weight_to_fp16, x = x_217_cast_fp16)[name = string("op_1990_cast_fp16")]; + tensor var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446518336)))]; + tensor var_2000_to_fp16 = const()[name = string("op_2000_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454907008)))]; + tensor linear_106_cast_fp16 = linear(bias = var_2000_to_fp16, weight = var_1999_to_fp16, x = var_1990_cast_fp16)[name = string("linear_106_cast_fp16")]; + string x_221_mode_0 = const()[name = string("x_221_mode_0"), val = string("EXACT")]; + tensor x_221_cast_fp16 = gelu(mode = x_221_mode_0, x = linear_106_cast_fp16)[name = string("x_221_cast_fp16")]; + tensor var_2005_to_fp16 = const()[name = string("op_2005_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454915264)))]; + tensor var_2006_to_fp16 = const()[name = string("op_2006_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463303936)))]; + tensor linear_107_cast_fp16 = linear(bias = var_2006_to_fp16, weight = var_2005_to_fp16, x = x_221_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor x_223_cast_fp16 = add(x = x_217_cast_fp16, y = linear_107_cast_fp16)[name = string("x_223_cast_fp16")]; + int32 var_2016 = const()[name = string("op_2016"), val = int32(-1)]; + tensor var_2032_axes_0 = const()[name = string("op_2032_axes_0"), val = tensor([-1])]; + tensor blocks_18_attn_ln_weight_to_fp16 = const()[name = string("blocks_18_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463306048)))]; + tensor blocks_18_attn_ln_bias_to_fp16 = const()[name = string("blocks_18_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463308160)))]; + fp16 var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2032_cast_fp16 = layer_norm(axes = var_2032_axes_0, beta = blocks_18_attn_ln_bias_to_fp16, epsilon = var_2022_to_fp16, gamma = blocks_18_attn_ln_weight_to_fp16, x = x_223_cast_fp16)[name = string("op_2032_cast_fp16")]; + tensor var_2043_to_fp16 = const()[name = string("op_2043_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463310272)))]; + tensor var_2044_to_fp16 = const()[name = string("op_2044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465407488)))]; + tensor linear_108_cast_fp16 = linear(bias = var_2044_to_fp16, weight = var_2043_to_fp16, x = var_2032_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor var_2047_to_fp16 = const()[name = string("op_2047_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465409600)))]; + tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2047_to_fp16, x = var_2032_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_2051_to_fp16 = const()[name = string("op_2051_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467506816)))]; + tensor var_2052_to_fp16 = const()[name = string("op_2052_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469604032)))]; + tensor linear_110_cast_fp16 = linear(bias = var_2052_to_fp16, weight = var_2051_to_fp16, x = var_2032_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor var_2060 = const()[name = string("op_2060"), val = tensor([1, 1500, 16, -1])]; + tensor var_2061_cast_fp16 = reshape(shape = var_2060, x = linear_108_cast_fp16)[name = string("op_2061_cast_fp16")]; + tensor const_204_to_fp16 = const()[name = string("const_204_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2061_cast_fp16, y = const_204_to_fp16)[name = string("q_75_cast_fp16")]; + tensor var_2067 = const()[name = string("op_2067"), val = tensor([1, 1500, 16, -1])]; + tensor var_2068_cast_fp16 = reshape(shape = var_2067, x = linear_109_cast_fp16)[name = string("op_2068_cast_fp16")]; + tensor const_205_to_fp16 = const()[name = string("const_205_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_2068_cast_fp16, y = const_205_to_fp16)[name = string("k_75_cast_fp16")]; + tensor var_2074 = const()[name = string("op_2074"), val = tensor([1, 1500, 16, -1])]; + tensor var_2075_cast_fp16 = reshape(shape = var_2074, x = linear_110_cast_fp16)[name = string("op_2075_cast_fp16")]; + tensor var_2076 = const()[name = string("op_2076"), val = tensor([0, 2, 1, 3])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = k_75_cast_fp16)[name = string("transpose_165")]; + tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = q_75_cast_fp16)[name = string("transpose_166")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_132, y = transpose_133)[name = string("qk_37_cast_fp16")]; + tensor var_2080_cast_fp16 = softmax(axis = var_2016, x = qk_37_cast_fp16)[name = string("op_2080_cast_fp16")]; + bool var_2082_transpose_x_0 = const()[name = string("op_2082_transpose_x_0"), val = bool(false)]; + bool var_2082_transpose_y_0 = const()[name = string("op_2082_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_2076, x = var_2075_cast_fp16)[name = string("transpose_167")]; + tensor var_2082_cast_fp16 = matmul(transpose_x = var_2082_transpose_x_0, transpose_y = var_2082_transpose_y_0, x = var_2080_cast_fp16, y = v_75_cast_fp16)[name = string("op_2082_cast_fp16")]; + tensor var_2083 = const()[name = string("op_2083"), val = tensor([0, 2, 1, 3])]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([1, 1500, 1024])]; + tensor var_2084_cast_fp16 = transpose(perm = var_2083, x = var_2082_cast_fp16)[name = string("transpose_164")]; + tensor x_227_cast_fp16 = reshape(shape = concat_18, x = var_2084_cast_fp16)[name = string("x_227_cast_fp16")]; + tensor var_2088_to_fp16 = const()[name = string("op_2088_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(469606144)))]; + tensor var_2089_to_fp16 = const()[name = string("op_2089_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471703360)))]; + tensor linear_111_cast_fp16 = linear(bias = var_2089_to_fp16, weight = var_2088_to_fp16, x = x_227_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor x_229_cast_fp16 = add(x = x_223_cast_fp16, y = linear_111_cast_fp16)[name = string("x_229_cast_fp16")]; + tensor var_2096_axes_0 = const()[name = string("op_2096_axes_0"), val = tensor([-1])]; + tensor blocks_18_mlp_ln_weight_to_fp16 = const()[name = string("blocks_18_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471705472)))]; + tensor blocks_18_mlp_ln_bias_to_fp16 = const()[name = string("blocks_18_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471707584)))]; + tensor var_2096_cast_fp16 = layer_norm(axes = var_2096_axes_0, beta = blocks_18_mlp_ln_bias_to_fp16, epsilon = var_2022_to_fp16, gamma = blocks_18_mlp_ln_weight_to_fp16, x = x_229_cast_fp16)[name = string("op_2096_cast_fp16")]; + tensor var_2105_to_fp16 = const()[name = string("op_2105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471709696)))]; + tensor var_2106_to_fp16 = const()[name = string("op_2106_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480098368)))]; + tensor linear_112_cast_fp16 = linear(bias = var_2106_to_fp16, weight = var_2105_to_fp16, x = var_2096_cast_fp16)[name = string("linear_112_cast_fp16")]; + string x_233_mode_0 = const()[name = string("x_233_mode_0"), val = string("EXACT")]; + tensor x_233_cast_fp16 = gelu(mode = x_233_mode_0, x = linear_112_cast_fp16)[name = string("x_233_cast_fp16")]; + tensor var_2111_to_fp16 = const()[name = string("op_2111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480106624)))]; + tensor var_2112_to_fp16 = const()[name = string("op_2112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488495296)))]; + tensor linear_113_cast_fp16 = linear(bias = var_2112_to_fp16, weight = var_2111_to_fp16, x = x_233_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor x_235_cast_fp16 = add(x = x_229_cast_fp16, y = linear_113_cast_fp16)[name = string("x_235_cast_fp16")]; + int32 var_2122 = const()[name = string("op_2122"), val = int32(-1)]; + tensor var_2138_axes_0 = const()[name = string("op_2138_axes_0"), val = tensor([-1])]; + tensor blocks_19_attn_ln_weight_to_fp16 = const()[name = string("blocks_19_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488497408)))]; + tensor blocks_19_attn_ln_bias_to_fp16 = const()[name = string("blocks_19_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488499520)))]; + fp16 var_2128_to_fp16 = const()[name = string("op_2128_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2138_cast_fp16 = layer_norm(axes = var_2138_axes_0, beta = blocks_19_attn_ln_bias_to_fp16, epsilon = var_2128_to_fp16, gamma = blocks_19_attn_ln_weight_to_fp16, x = x_235_cast_fp16)[name = string("op_2138_cast_fp16")]; + tensor var_2149_to_fp16 = const()[name = string("op_2149_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488501632)))]; + tensor var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490598848)))]; + tensor linear_114_cast_fp16 = linear(bias = var_2150_to_fp16, weight = var_2149_to_fp16, x = var_2138_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor var_2153_to_fp16 = const()[name = string("op_2153_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490600960)))]; + tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2153_to_fp16, x = var_2138_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor var_2157_to_fp16 = const()[name = string("op_2157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492698176)))]; + tensor var_2158_to_fp16 = const()[name = string("op_2158_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494795392)))]; + tensor linear_116_cast_fp16 = linear(bias = var_2158_to_fp16, weight = var_2157_to_fp16, x = var_2138_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_2166 = const()[name = string("op_2166"), val = tensor([1, 1500, 16, -1])]; + tensor var_2167_cast_fp16 = reshape(shape = var_2166, x = linear_114_cast_fp16)[name = string("op_2167_cast_fp16")]; + tensor const_206_to_fp16 = const()[name = string("const_206_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2167_cast_fp16, y = const_206_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2173 = const()[name = string("op_2173"), val = tensor([1, 1500, 16, -1])]; + tensor var_2174_cast_fp16 = reshape(shape = var_2173, x = linear_115_cast_fp16)[name = string("op_2174_cast_fp16")]; + tensor const_207_to_fp16 = const()[name = string("const_207_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_2174_cast_fp16, y = const_207_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_2180 = const()[name = string("op_2180"), val = tensor([1, 1500, 16, -1])]; + tensor var_2181_cast_fp16 = reshape(shape = var_2180, x = linear_116_cast_fp16)[name = string("op_2181_cast_fp16")]; + tensor var_2182 = const()[name = string("op_2182"), val = tensor([0, 2, 1, 3])]; + bool qk_39_transpose_x_0 = const()[name = string("qk_39_transpose_x_0"), val = bool(false)]; + bool qk_39_transpose_y_0 = const()[name = string("qk_39_transpose_y_0"), val = bool(false)]; + tensor transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = k_79_cast_fp16)[name = string("transpose_161")]; + tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = q_79_cast_fp16)[name = string("transpose_162")]; + tensor qk_39_cast_fp16 = matmul(transpose_x = qk_39_transpose_x_0, transpose_y = qk_39_transpose_y_0, x = transpose_134, y = transpose_135)[name = string("qk_39_cast_fp16")]; + tensor var_2186_cast_fp16 = softmax(axis = var_2122, x = qk_39_cast_fp16)[name = string("op_2186_cast_fp16")]; + bool var_2188_transpose_x_0 = const()[name = string("op_2188_transpose_x_0"), val = bool(false)]; + bool var_2188_transpose_y_0 = const()[name = string("op_2188_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_2182, x = var_2181_cast_fp16)[name = string("transpose_163")]; + tensor var_2188_cast_fp16 = matmul(transpose_x = var_2188_transpose_x_0, transpose_y = var_2188_transpose_y_0, x = var_2186_cast_fp16, y = v_79_cast_fp16)[name = string("op_2188_cast_fp16")]; + tensor var_2189 = const()[name = string("op_2189"), val = tensor([0, 2, 1, 3])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([1, 1500, 1024])]; + tensor var_2190_cast_fp16 = transpose(perm = var_2189, x = var_2188_cast_fp16)[name = string("transpose_160")]; + tensor x_239_cast_fp16 = reshape(shape = concat_19, x = var_2190_cast_fp16)[name = string("x_239_cast_fp16")]; + tensor var_2194_to_fp16 = const()[name = string("op_2194_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(494797504)))]; + tensor var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496894720)))]; + tensor linear_117_cast_fp16 = linear(bias = var_2195_to_fp16, weight = var_2194_to_fp16, x = x_239_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor x_241_cast_fp16 = add(x = x_235_cast_fp16, y = linear_117_cast_fp16)[name = string("x_241_cast_fp16")]; + tensor var_2202_axes_0 = const()[name = string("op_2202_axes_0"), val = tensor([-1])]; + tensor blocks_19_mlp_ln_weight_to_fp16 = const()[name = string("blocks_19_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496896832)))]; + tensor blocks_19_mlp_ln_bias_to_fp16 = const()[name = string("blocks_19_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496898944)))]; + tensor var_2202_cast_fp16 = layer_norm(axes = var_2202_axes_0, beta = blocks_19_mlp_ln_bias_to_fp16, epsilon = var_2128_to_fp16, gamma = blocks_19_mlp_ln_weight_to_fp16, x = x_241_cast_fp16)[name = string("op_2202_cast_fp16")]; + tensor var_2211_to_fp16 = const()[name = string("op_2211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496901056)))]; + tensor var_2212_to_fp16 = const()[name = string("op_2212_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505289728)))]; + tensor linear_118_cast_fp16 = linear(bias = var_2212_to_fp16, weight = var_2211_to_fp16, x = var_2202_cast_fp16)[name = string("linear_118_cast_fp16")]; + string x_245_mode_0 = const()[name = string("x_245_mode_0"), val = string("EXACT")]; + tensor x_245_cast_fp16 = gelu(mode = x_245_mode_0, x = linear_118_cast_fp16)[name = string("x_245_cast_fp16")]; + tensor var_2217_to_fp16 = const()[name = string("op_2217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505297984)))]; + tensor var_2218_to_fp16 = const()[name = string("op_2218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513686656)))]; + tensor linear_119_cast_fp16 = linear(bias = var_2218_to_fp16, weight = var_2217_to_fp16, x = x_245_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor x_247_cast_fp16 = add(x = x_241_cast_fp16, y = linear_119_cast_fp16)[name = string("x_247_cast_fp16")]; + int32 var_2228 = const()[name = string("op_2228"), val = int32(-1)]; + tensor var_2244_axes_0 = const()[name = string("op_2244_axes_0"), val = tensor([-1])]; + tensor blocks_20_attn_ln_weight_to_fp16 = const()[name = string("blocks_20_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513688768)))]; + tensor blocks_20_attn_ln_bias_to_fp16 = const()[name = string("blocks_20_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513690880)))]; + fp16 var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2244_cast_fp16 = layer_norm(axes = var_2244_axes_0, beta = blocks_20_attn_ln_bias_to_fp16, epsilon = var_2234_to_fp16, gamma = blocks_20_attn_ln_weight_to_fp16, x = x_247_cast_fp16)[name = string("op_2244_cast_fp16")]; + tensor var_2255_to_fp16 = const()[name = string("op_2255_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513692992)))]; + tensor var_2256_to_fp16 = const()[name = string("op_2256_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515790208)))]; + tensor linear_120_cast_fp16 = linear(bias = var_2256_to_fp16, weight = var_2255_to_fp16, x = var_2244_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(515792320)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2259_to_fp16, x = var_2244_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor var_2263_to_fp16 = const()[name = string("op_2263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517889536)))]; + tensor var_2264_to_fp16 = const()[name = string("op_2264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519986752)))]; + tensor linear_122_cast_fp16 = linear(bias = var_2264_to_fp16, weight = var_2263_to_fp16, x = var_2244_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor var_2272 = const()[name = string("op_2272"), val = tensor([1, 1500, 16, -1])]; + tensor var_2273_cast_fp16 = reshape(shape = var_2272, x = linear_120_cast_fp16)[name = string("op_2273_cast_fp16")]; + tensor const_208_to_fp16 = const()[name = string("const_208_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2273_cast_fp16, y = const_208_to_fp16)[name = string("q_83_cast_fp16")]; + tensor var_2279 = const()[name = string("op_2279"), val = tensor([1, 1500, 16, -1])]; + tensor var_2280_cast_fp16 = reshape(shape = var_2279, x = linear_121_cast_fp16)[name = string("op_2280_cast_fp16")]; + tensor const_209_to_fp16 = const()[name = string("const_209_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_83_cast_fp16 = mul(x = var_2280_cast_fp16, y = const_209_to_fp16)[name = string("k_83_cast_fp16")]; + tensor var_2286 = const()[name = string("op_2286"), val = tensor([1, 1500, 16, -1])]; + tensor var_2287_cast_fp16 = reshape(shape = var_2286, x = linear_122_cast_fp16)[name = string("op_2287_cast_fp16")]; + tensor var_2288 = const()[name = string("op_2288"), val = tensor([0, 2, 1, 3])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = k_83_cast_fp16)[name = string("transpose_157")]; + tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = q_83_cast_fp16)[name = string("transpose_158")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_136, y = transpose_137)[name = string("qk_41_cast_fp16")]; + tensor var_2292_cast_fp16 = softmax(axis = var_2228, x = qk_41_cast_fp16)[name = string("op_2292_cast_fp16")]; + bool var_2294_transpose_x_0 = const()[name = string("op_2294_transpose_x_0"), val = bool(false)]; + bool var_2294_transpose_y_0 = const()[name = string("op_2294_transpose_y_0"), val = bool(false)]; + tensor v_83_cast_fp16 = transpose(perm = var_2288, x = var_2287_cast_fp16)[name = string("transpose_159")]; + tensor var_2294_cast_fp16 = matmul(transpose_x = var_2294_transpose_x_0, transpose_y = var_2294_transpose_y_0, x = var_2292_cast_fp16, y = v_83_cast_fp16)[name = string("op_2294_cast_fp16")]; + tensor var_2295 = const()[name = string("op_2295"), val = tensor([0, 2, 1, 3])]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([1, 1500, 1024])]; + tensor var_2296_cast_fp16 = transpose(perm = var_2295, x = var_2294_cast_fp16)[name = string("transpose_156")]; + tensor x_251_cast_fp16 = reshape(shape = concat_20, x = var_2296_cast_fp16)[name = string("x_251_cast_fp16")]; + tensor var_2300_to_fp16 = const()[name = string("op_2300_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(519988864)))]; + tensor var_2301_to_fp16 = const()[name = string("op_2301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522086080)))]; + tensor linear_123_cast_fp16 = linear(bias = var_2301_to_fp16, weight = var_2300_to_fp16, x = x_251_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor x_253_cast_fp16 = add(x = x_247_cast_fp16, y = linear_123_cast_fp16)[name = string("x_253_cast_fp16")]; + tensor var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor([-1])]; + tensor blocks_20_mlp_ln_weight_to_fp16 = const()[name = string("blocks_20_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522088192)))]; + tensor blocks_20_mlp_ln_bias_to_fp16 = const()[name = string("blocks_20_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522090304)))]; + tensor var_2308_cast_fp16 = layer_norm(axes = var_2308_axes_0, beta = blocks_20_mlp_ln_bias_to_fp16, epsilon = var_2234_to_fp16, gamma = blocks_20_mlp_ln_weight_to_fp16, x = x_253_cast_fp16)[name = string("op_2308_cast_fp16")]; + tensor var_2317_to_fp16 = const()[name = string("op_2317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(522092416)))]; + tensor var_2318_to_fp16 = const()[name = string("op_2318_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530481088)))]; + tensor linear_124_cast_fp16 = linear(bias = var_2318_to_fp16, weight = var_2317_to_fp16, x = var_2308_cast_fp16)[name = string("linear_124_cast_fp16")]; + string x_257_mode_0 = const()[name = string("x_257_mode_0"), val = string("EXACT")]; + tensor x_257_cast_fp16 = gelu(mode = x_257_mode_0, x = linear_124_cast_fp16)[name = string("x_257_cast_fp16")]; + tensor var_2323_to_fp16 = const()[name = string("op_2323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530489344)))]; + tensor var_2324_to_fp16 = const()[name = string("op_2324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538878016)))]; + tensor linear_125_cast_fp16 = linear(bias = var_2324_to_fp16, weight = var_2323_to_fp16, x = x_257_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_125_cast_fp16)[name = string("x_259_cast_fp16")]; + int32 var_2334 = const()[name = string("op_2334"), val = int32(-1)]; + tensor var_2350_axes_0 = const()[name = string("op_2350_axes_0"), val = tensor([-1])]; + tensor blocks_21_attn_ln_weight_to_fp16 = const()[name = string("blocks_21_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538880128)))]; + tensor blocks_21_attn_ln_bias_to_fp16 = const()[name = string("blocks_21_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538882240)))]; + fp16 var_2340_to_fp16 = const()[name = string("op_2340_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2350_cast_fp16 = layer_norm(axes = var_2350_axes_0, beta = blocks_21_attn_ln_bias_to_fp16, epsilon = var_2340_to_fp16, gamma = blocks_21_attn_ln_weight_to_fp16, x = x_259_cast_fp16)[name = string("op_2350_cast_fp16")]; + tensor var_2361_to_fp16 = const()[name = string("op_2361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538884352)))]; + tensor var_2362_to_fp16 = const()[name = string("op_2362_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540981568)))]; + tensor linear_126_cast_fp16 = linear(bias = var_2362_to_fp16, weight = var_2361_to_fp16, x = var_2350_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor var_2365_to_fp16 = const()[name = string("op_2365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540983680)))]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2365_to_fp16, x = var_2350_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor var_2369_to_fp16 = const()[name = string("op_2369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543080896)))]; + tensor var_2370_to_fp16 = const()[name = string("op_2370_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545178112)))]; + tensor linear_128_cast_fp16 = linear(bias = var_2370_to_fp16, weight = var_2369_to_fp16, x = var_2350_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor var_2378 = const()[name = string("op_2378"), val = tensor([1, 1500, 16, -1])]; + tensor var_2379_cast_fp16 = reshape(shape = var_2378, x = linear_126_cast_fp16)[name = string("op_2379_cast_fp16")]; + tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2379_cast_fp16, y = const_210_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2385 = const()[name = string("op_2385"), val = tensor([1, 1500, 16, -1])]; + tensor var_2386_cast_fp16 = reshape(shape = var_2385, x = linear_127_cast_fp16)[name = string("op_2386_cast_fp16")]; + tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_87_cast_fp16 = mul(x = var_2386_cast_fp16, y = const_211_to_fp16)[name = string("k_87_cast_fp16")]; + tensor var_2392 = const()[name = string("op_2392"), val = tensor([1, 1500, 16, -1])]; + tensor var_2393_cast_fp16 = reshape(shape = var_2392, x = linear_128_cast_fp16)[name = string("op_2393_cast_fp16")]; + tensor var_2394 = const()[name = string("op_2394"), val = tensor([0, 2, 1, 3])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = k_87_cast_fp16)[name = string("transpose_153")]; + tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = q_87_cast_fp16)[name = string("transpose_154")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_138, y = transpose_139)[name = string("qk_43_cast_fp16")]; + tensor var_2398_cast_fp16 = softmax(axis = var_2334, x = qk_43_cast_fp16)[name = string("op_2398_cast_fp16")]; + bool var_2400_transpose_x_0 = const()[name = string("op_2400_transpose_x_0"), val = bool(false)]; + bool var_2400_transpose_y_0 = const()[name = string("op_2400_transpose_y_0"), val = bool(false)]; + tensor v_87_cast_fp16 = transpose(perm = var_2394, x = var_2393_cast_fp16)[name = string("transpose_155")]; + tensor var_2400_cast_fp16 = matmul(transpose_x = var_2400_transpose_x_0, transpose_y = var_2400_transpose_y_0, x = var_2398_cast_fp16, y = v_87_cast_fp16)[name = string("op_2400_cast_fp16")]; + tensor var_2401 = const()[name = string("op_2401"), val = tensor([0, 2, 1, 3])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([1, 1500, 1024])]; + tensor var_2402_cast_fp16 = transpose(perm = var_2401, x = var_2400_cast_fp16)[name = string("transpose_152")]; + tensor x_263_cast_fp16 = reshape(shape = concat_21, x = var_2402_cast_fp16)[name = string("x_263_cast_fp16")]; + tensor var_2406_to_fp16 = const()[name = string("op_2406_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(545180224)))]; + tensor var_2407_to_fp16 = const()[name = string("op_2407_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547277440)))]; + tensor linear_129_cast_fp16 = linear(bias = var_2407_to_fp16, weight = var_2406_to_fp16, x = x_263_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor x_265_cast_fp16 = add(x = x_259_cast_fp16, y = linear_129_cast_fp16)[name = string("x_265_cast_fp16")]; + tensor var_2414_axes_0 = const()[name = string("op_2414_axes_0"), val = tensor([-1])]; + tensor blocks_21_mlp_ln_weight_to_fp16 = const()[name = string("blocks_21_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547279552)))]; + tensor blocks_21_mlp_ln_bias_to_fp16 = const()[name = string("blocks_21_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547281664)))]; + tensor var_2414_cast_fp16 = layer_norm(axes = var_2414_axes_0, beta = blocks_21_mlp_ln_bias_to_fp16, epsilon = var_2340_to_fp16, gamma = blocks_21_mlp_ln_weight_to_fp16, x = x_265_cast_fp16)[name = string("op_2414_cast_fp16")]; + tensor var_2423_to_fp16 = const()[name = string("op_2423_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547283776)))]; + tensor var_2424_to_fp16 = const()[name = string("op_2424_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555672448)))]; + tensor linear_130_cast_fp16 = linear(bias = var_2424_to_fp16, weight = var_2423_to_fp16, x = var_2414_cast_fp16)[name = string("linear_130_cast_fp16")]; + string x_269_mode_0 = const()[name = string("x_269_mode_0"), val = string("EXACT")]; + tensor x_269_cast_fp16 = gelu(mode = x_269_mode_0, x = linear_130_cast_fp16)[name = string("x_269_cast_fp16")]; + tensor var_2429_to_fp16 = const()[name = string("op_2429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555680704)))]; + tensor var_2430_to_fp16 = const()[name = string("op_2430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564069376)))]; + tensor linear_131_cast_fp16 = linear(bias = var_2430_to_fp16, weight = var_2429_to_fp16, x = x_269_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor x_271_cast_fp16 = add(x = x_265_cast_fp16, y = linear_131_cast_fp16)[name = string("x_271_cast_fp16")]; + int32 var_2440 = const()[name = string("op_2440"), val = int32(-1)]; + tensor var_2456_axes_0 = const()[name = string("op_2456_axes_0"), val = tensor([-1])]; + tensor blocks_22_attn_ln_weight_to_fp16 = const()[name = string("blocks_22_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564071488)))]; + tensor blocks_22_attn_ln_bias_to_fp16 = const()[name = string("blocks_22_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564073600)))]; + fp16 var_2446_to_fp16 = const()[name = string("op_2446_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2456_cast_fp16 = layer_norm(axes = var_2456_axes_0, beta = blocks_22_attn_ln_bias_to_fp16, epsilon = var_2446_to_fp16, gamma = blocks_22_attn_ln_weight_to_fp16, x = x_271_cast_fp16)[name = string("op_2456_cast_fp16")]; + tensor var_2467_to_fp16 = const()[name = string("op_2467_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564075712)))]; + tensor var_2468_to_fp16 = const()[name = string("op_2468_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566172928)))]; + tensor linear_132_cast_fp16 = linear(bias = var_2468_to_fp16, weight = var_2467_to_fp16, x = var_2456_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor var_2471_to_fp16 = const()[name = string("op_2471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566175040)))]; + tensor linear_133_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2471_to_fp16, x = var_2456_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor var_2475_to_fp16 = const()[name = string("op_2475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(568272256)))]; + tensor var_2476_to_fp16 = const()[name = string("op_2476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570369472)))]; + tensor linear_134_cast_fp16 = linear(bias = var_2476_to_fp16, weight = var_2475_to_fp16, x = var_2456_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor var_2484 = const()[name = string("op_2484"), val = tensor([1, 1500, 16, -1])]; + tensor var_2485_cast_fp16 = reshape(shape = var_2484, x = linear_132_cast_fp16)[name = string("op_2485_cast_fp16")]; + tensor const_212_to_fp16 = const()[name = string("const_212_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2485_cast_fp16, y = const_212_to_fp16)[name = string("q_91_cast_fp16")]; + tensor var_2491 = const()[name = string("op_2491"), val = tensor([1, 1500, 16, -1])]; + tensor var_2492_cast_fp16 = reshape(shape = var_2491, x = linear_133_cast_fp16)[name = string("op_2492_cast_fp16")]; + tensor const_213_to_fp16 = const()[name = string("const_213_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_91_cast_fp16 = mul(x = var_2492_cast_fp16, y = const_213_to_fp16)[name = string("k_91_cast_fp16")]; + tensor var_2498 = const()[name = string("op_2498"), val = tensor([1, 1500, 16, -1])]; + tensor var_2499_cast_fp16 = reshape(shape = var_2498, x = linear_134_cast_fp16)[name = string("op_2499_cast_fp16")]; + tensor var_2500 = const()[name = string("op_2500"), val = tensor([0, 2, 1, 3])]; + bool qk_45_transpose_x_0 = const()[name = string("qk_45_transpose_x_0"), val = bool(false)]; + bool qk_45_transpose_y_0 = const()[name = string("qk_45_transpose_y_0"), val = bool(false)]; + tensor transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = k_91_cast_fp16)[name = string("transpose_149")]; + tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = q_91_cast_fp16)[name = string("transpose_150")]; + tensor qk_45_cast_fp16 = matmul(transpose_x = qk_45_transpose_x_0, transpose_y = qk_45_transpose_y_0, x = transpose_140, y = transpose_141)[name = string("qk_45_cast_fp16")]; + tensor var_2504_cast_fp16 = softmax(axis = var_2440, x = qk_45_cast_fp16)[name = string("op_2504_cast_fp16")]; + bool var_2506_transpose_x_0 = const()[name = string("op_2506_transpose_x_0"), val = bool(false)]; + bool var_2506_transpose_y_0 = const()[name = string("op_2506_transpose_y_0"), val = bool(false)]; + tensor v_91_cast_fp16 = transpose(perm = var_2500, x = var_2499_cast_fp16)[name = string("transpose_151")]; + tensor var_2506_cast_fp16 = matmul(transpose_x = var_2506_transpose_x_0, transpose_y = var_2506_transpose_y_0, x = var_2504_cast_fp16, y = v_91_cast_fp16)[name = string("op_2506_cast_fp16")]; + tensor var_2507 = const()[name = string("op_2507"), val = tensor([0, 2, 1, 3])]; + tensor concat_22 = const()[name = string("concat_22"), val = tensor([1, 1500, 1024])]; + tensor var_2508_cast_fp16 = transpose(perm = var_2507, x = var_2506_cast_fp16)[name = string("transpose_148")]; + tensor x_275_cast_fp16 = reshape(shape = concat_22, x = var_2508_cast_fp16)[name = string("x_275_cast_fp16")]; + tensor var_2512_to_fp16 = const()[name = string("op_2512_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570371584)))]; + tensor var_2513_to_fp16 = const()[name = string("op_2513_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572468800)))]; + tensor linear_135_cast_fp16 = linear(bias = var_2513_to_fp16, weight = var_2512_to_fp16, x = x_275_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor x_277_cast_fp16 = add(x = x_271_cast_fp16, y = linear_135_cast_fp16)[name = string("x_277_cast_fp16")]; + tensor var_2520_axes_0 = const()[name = string("op_2520_axes_0"), val = tensor([-1])]; + tensor blocks_22_mlp_ln_weight_to_fp16 = const()[name = string("blocks_22_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572470912)))]; + tensor blocks_22_mlp_ln_bias_to_fp16 = const()[name = string("blocks_22_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572473024)))]; + tensor var_2520_cast_fp16 = layer_norm(axes = var_2520_axes_0, beta = blocks_22_mlp_ln_bias_to_fp16, epsilon = var_2446_to_fp16, gamma = blocks_22_mlp_ln_weight_to_fp16, x = x_277_cast_fp16)[name = string("op_2520_cast_fp16")]; + tensor var_2529_to_fp16 = const()[name = string("op_2529_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572475136)))]; + tensor var_2530_to_fp16 = const()[name = string("op_2530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580863808)))]; + tensor linear_136_cast_fp16 = linear(bias = var_2530_to_fp16, weight = var_2529_to_fp16, x = var_2520_cast_fp16)[name = string("linear_136_cast_fp16")]; + string x_281_mode_0 = const()[name = string("x_281_mode_0"), val = string("EXACT")]; + tensor x_281_cast_fp16 = gelu(mode = x_281_mode_0, x = linear_136_cast_fp16)[name = string("x_281_cast_fp16")]; + tensor var_2535_to_fp16 = const()[name = string("op_2535_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(580872064)))]; + tensor var_2536_to_fp16 = const()[name = string("op_2536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589260736)))]; + tensor linear_137_cast_fp16 = linear(bias = var_2536_to_fp16, weight = var_2535_to_fp16, x = x_281_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor x_283_cast_fp16 = add(x = x_277_cast_fp16, y = linear_137_cast_fp16)[name = string("x_283_cast_fp16")]; + int32 var_2546 = const()[name = string("op_2546"), val = int32(-1)]; + tensor var_2562_axes_0 = const()[name = string("op_2562_axes_0"), val = tensor([-1])]; + tensor blocks_23_attn_ln_weight_to_fp16 = const()[name = string("blocks_23_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589262848)))]; + tensor blocks_23_attn_ln_bias_to_fp16 = const()[name = string("blocks_23_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589264960)))]; + fp16 var_2552_to_fp16 = const()[name = string("op_2552_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2562_cast_fp16 = layer_norm(axes = var_2562_axes_0, beta = blocks_23_attn_ln_bias_to_fp16, epsilon = var_2552_to_fp16, gamma = blocks_23_attn_ln_weight_to_fp16, x = x_283_cast_fp16)[name = string("op_2562_cast_fp16")]; + tensor var_2573_to_fp16 = const()[name = string("op_2573_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(589267072)))]; + tensor var_2574_to_fp16 = const()[name = string("op_2574_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591364288)))]; + tensor linear_138_cast_fp16 = linear(bias = var_2574_to_fp16, weight = var_2573_to_fp16, x = var_2562_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor var_2577_to_fp16 = const()[name = string("op_2577_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591366400)))]; + tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2577_to_fp16, x = var_2562_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor var_2581_to_fp16 = const()[name = string("op_2581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(593463616)))]; + tensor var_2582_to_fp16 = const()[name = string("op_2582_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595560832)))]; + tensor linear_140_cast_fp16 = linear(bias = var_2582_to_fp16, weight = var_2581_to_fp16, x = var_2562_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor var_2590 = const()[name = string("op_2590"), val = tensor([1, 1500, 16, -1])]; + tensor var_2591_cast_fp16 = reshape(shape = var_2590, x = linear_138_cast_fp16)[name = string("op_2591_cast_fp16")]; + tensor const_214_to_fp16 = const()[name = string("const_214_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_2591_cast_fp16, y = const_214_to_fp16)[name = string("q_cast_fp16")]; + tensor var_2597 = const()[name = string("op_2597"), val = tensor([1, 1500, 16, -1])]; + tensor var_2598_cast_fp16 = reshape(shape = var_2597, x = linear_139_cast_fp16)[name = string("op_2598_cast_fp16")]; + tensor const_215_to_fp16 = const()[name = string("const_215_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_2598_cast_fp16, y = const_215_to_fp16)[name = string("k_cast_fp16")]; + tensor var_2604 = const()[name = string("op_2604"), val = tensor([1, 1500, 16, -1])]; + tensor var_2605_cast_fp16 = reshape(shape = var_2604, x = linear_140_cast_fp16)[name = string("op_2605_cast_fp16")]; + tensor var_2606 = const()[name = string("op_2606"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = k_cast_fp16)[name = string("transpose_145")]; + tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = q_cast_fp16)[name = string("transpose_146")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_142, y = transpose_143)[name = string("qk_cast_fp16")]; + tensor var_2610_cast_fp16 = softmax(axis = var_2546, x = qk_cast_fp16)[name = string("op_2610_cast_fp16")]; + bool var_2612_transpose_x_0 = const()[name = string("op_2612_transpose_x_0"), val = bool(false)]; + bool var_2612_transpose_y_0 = const()[name = string("op_2612_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_2606, x = var_2605_cast_fp16)[name = string("transpose_147")]; + tensor var_2612_cast_fp16 = matmul(transpose_x = var_2612_transpose_x_0, transpose_y = var_2612_transpose_y_0, x = var_2610_cast_fp16, y = v_cast_fp16)[name = string("op_2612_cast_fp16")]; + tensor var_2613 = const()[name = string("op_2613"), val = tensor([0, 2, 1, 3])]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([1, 1500, 1024])]; + tensor var_2614_cast_fp16 = transpose(perm = var_2613, x = var_2612_cast_fp16)[name = string("transpose_144")]; + tensor x_287_cast_fp16 = reshape(shape = concat_23, x = var_2614_cast_fp16)[name = string("x_287_cast_fp16")]; + tensor var_2618_to_fp16 = const()[name = string("op_2618_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595562944)))]; + tensor var_2619_to_fp16 = const()[name = string("op_2619_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597660160)))]; + tensor linear_141_cast_fp16 = linear(bias = var_2619_to_fp16, weight = var_2618_to_fp16, x = x_287_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor x_289_cast_fp16 = add(x = x_283_cast_fp16, y = linear_141_cast_fp16)[name = string("x_289_cast_fp16")]; + tensor var_2626_axes_0 = const()[name = string("op_2626_axes_0"), val = tensor([-1])]; + tensor blocks_23_mlp_ln_weight_to_fp16 = const()[name = string("blocks_23_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597662272)))]; + tensor blocks_23_mlp_ln_bias_to_fp16 = const()[name = string("blocks_23_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597664384)))]; + tensor var_2626_cast_fp16 = layer_norm(axes = var_2626_axes_0, beta = blocks_23_mlp_ln_bias_to_fp16, epsilon = var_2552_to_fp16, gamma = blocks_23_mlp_ln_weight_to_fp16, x = x_289_cast_fp16)[name = string("op_2626_cast_fp16")]; + tensor var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(597666496)))]; + tensor var_2636_to_fp16 = const()[name = string("op_2636_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606055168)))]; + tensor linear_142_cast_fp16 = linear(bias = var_2636_to_fp16, weight = var_2635_to_fp16, x = var_2626_cast_fp16)[name = string("linear_142_cast_fp16")]; + string x_293_mode_0 = const()[name = string("x_293_mode_0"), val = string("EXACT")]; + tensor x_293_cast_fp16 = gelu(mode = x_293_mode_0, x = linear_142_cast_fp16)[name = string("x_293_cast_fp16")]; + tensor var_2641_to_fp16 = const()[name = string("op_2641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606063424)))]; + tensor var_2642_to_fp16 = const()[name = string("op_2642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614452096)))]; + tensor linear_143_cast_fp16 = linear(bias = var_2642_to_fp16, weight = var_2641_to_fp16, x = x_293_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_289_cast_fp16, y = linear_143_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_2655_axes_0 = const()[name = string("op_2655_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614454208)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(614456320)))]; + fp16 var_2646_to_fp16 = const()[name = string("op_2646_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_2655_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_2646_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_2655_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/medium/encoder.mlmodelc/weights/weight.bin b/medium/encoder.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..1866ade87664eb3f1788b808651fba6831f3740c --- /dev/null +++ b/medium/encoder.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd1baef4c7d8260ea817ea56705b3700155c01e8d3ea4bc8e364a8674a88d15 +size 614458432 diff --git a/medium/model_dims.json b/medium/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..477e24aaa9c13c6726a8df61f16bd82f1405be55 --- /dev/null +++ b/medium/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 1024, + "n_audio_head": 16, + "n_audio_layer": 24, + "n_vocab": 51865, + "n_text_ctx": 448, + "n_text_state": 1024, + "n_text_head": 16, + "n_text_layer": 24 +} \ No newline at end of file diff --git a/small/decoder_first.mlmodelc/analytics/coremldata.bin b/small/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..0032999b4489c4f9d5bd9515f717cc6e1c9fe736 --- /dev/null +++ b/small/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d6c44560c36d629943c0c46fda6c8a900954a25d081de6ca16e2e45d48cd +size 243 diff --git a/small/decoder_first.mlmodelc/coremldata.bin b/small/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..80bf6c99e245a2440709d8dbd5a28cb5341b2e42 --- /dev/null +++ b/small/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a62f546da9814e8ccdc00a5ab41a7f8198d82970eb5fedb956aa58759ef3609 +size 453 diff --git a/small/decoder_first.mlmodelc/metadata.json b/small/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..999dc10290ff4c233fecaf090537792a279d768f --- /dev/null +++ b/small/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 26, + "Shape" : 24, + "Ios18.linear" : 24, + "Identity" : 1, + "Ios18.gather" : 24, + "Ios18.concat" : 24, + "Ios18.sliceUpdate" : 26, + "Ios18.cast" : 48, + "Ios18.expandDims" : 24, + "Ios18.readState" : 26 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 448 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 448, 768]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 448 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 448, 768]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 1500 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 1500, 768]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 1500 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 1500, 768]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 768", + "shapeRange" : "[[1, 1], [1, 1500], [768, 768]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 768)", + "type" : "MultiArray", + "shape" : "[1, 1, 768]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/small/decoder_first.mlmodelc/model.mil b/small/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..a28886a25fef705861bad1d346a593543141b415 --- /dev/null +++ b/small/decoder_first.mlmodelc/model.mil @@ -0,0 +1,711 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 768]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [768, 768]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_26_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_27_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_91_to_fp16 = const()[name = string("op_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8257664)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437376)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_91_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9438976)))]; + tensor var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10618688)))]; + tensor linear_1_cast_fp16 = linear(bias = var_96_to_fp16, weight = var_95_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_98_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_98_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_98_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_98_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_98_shape_cast_fp16_to_int16 = cast(dtype = var_98_shape_cast_fp16_to_int16_dtype_0, x = var_98_shape_cast_fp16)[name = string("cast_79")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_98_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_78")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = k_cache2)[name = string("coreml_update_state_28")]; + tensor var_103_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_103_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_103_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_103_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_103_shape_cast_fp16_to_uint16 = cast(dtype = var_103_shape_cast_fp16_to_uint16_dtype_0, x = var_103_shape_cast_fp16)[name = string("cast_77")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_103_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_76")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = v_cache2)[name = string("coreml_update_state_29")]; + tensor var_125_to_fp16 = const()[name = string("op_125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10620288)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_125_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11800000)))]; + tensor var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12979712)))]; + tensor linear_3_cast_fp16 = linear(bias = var_130_to_fp16, weight = var_129_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_132_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_132_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_132_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_132_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_132_shape_cast_fp16_to_uint16 = cast(dtype = var_132_shape_cast_fp16_to_uint16_dtype_0, x = var_132_shape_cast_fp16)[name = string("cast_75")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_132_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_74")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_28)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = k_cache2)[name = string("coreml_update_state_30")]; + tensor var_137_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_137_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_137_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_137_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_137_shape_cast_fp16_to_uint16 = cast(dtype = var_137_shape_cast_fp16_to_uint16_dtype_0, x = var_137_shape_cast_fp16)[name = string("cast_73")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_137_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_72")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_29)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = v_cache2)[name = string("coreml_update_state_31")]; + tensor var_159_to_fp16 = const()[name = string("op_159_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12981312)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_159_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_163_to_fp16 = const()[name = string("op_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14161024)))]; + tensor var_164_to_fp16 = const()[name = string("op_164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15340736)))]; + tensor linear_5_cast_fp16 = linear(bias = var_164_to_fp16, weight = var_163_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_166_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_166_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_166_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_166_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_166_shape_cast_fp16_to_uint16 = cast(dtype = var_166_shape_cast_fp16_to_uint16_dtype_0, x = var_166_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_166_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_70")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_30)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = k_cache2)[name = string("coreml_update_state_32")]; + tensor var_171_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_171_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_171_shape_cast_fp16_to_uint16 = cast(dtype = var_171_shape_cast_fp16_to_uint16_dtype_0, x = var_171_shape_cast_fp16)[name = string("cast_69")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_171_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_68")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_31)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = v_cache2)[name = string("coreml_update_state_33")]; + tensor var_193_to_fp16 = const()[name = string("op_193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15342336)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_193_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_197_to_fp16 = const()[name = string("op_197_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16522048)))]; + tensor var_198_to_fp16 = const()[name = string("op_198_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17701760)))]; + tensor linear_7_cast_fp16 = linear(bias = var_198_to_fp16, weight = var_197_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_200_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_200_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_200_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_200_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_200_shape_cast_fp16_to_uint16 = cast(dtype = var_200_shape_cast_fp16_to_uint16_dtype_0, x = var_200_shape_cast_fp16)[name = string("cast_67")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_200_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_66")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_32)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = k_cache2)[name = string("coreml_update_state_34")]; + tensor var_205_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_205_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_205_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_205_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_205_shape_cast_fp16_to_uint16 = cast(dtype = var_205_shape_cast_fp16_to_uint16_dtype_0, x = var_205_shape_cast_fp16)[name = string("cast_65")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_205_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_64")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_33)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = v_cache2)[name = string("coreml_update_state_35")]; + tensor var_227_to_fp16 = const()[name = string("op_227_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17703360)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_227_to_fp16, x = audio_data)[name = string("linear_8_cast_fp16")]; + tensor var_231_to_fp16 = const()[name = string("op_231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18883072)))]; + tensor var_232_to_fp16 = const()[name = string("op_232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20062784)))]; + tensor linear_9_cast_fp16 = linear(bias = var_232_to_fp16, weight = var_231_to_fp16, x = audio_data)[name = string("linear_9_cast_fp16")]; + tensor var_234_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_234_shape_cast_fp16")]; + int32 gather_8_axis_0 = const()[name = string("gather_8_axis_0"), val = int32(0)]; + int32 gather_8_batch_dims_0 = const()[name = string("gather_8_batch_dims_0"), val = int32(0)]; + bool gather_8_validate_indices_0 = const()[name = string("gather_8_validate_indices_0"), val = bool(false)]; + string var_234_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_234_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_8_to_uint16 = const()[name = string("select_8_to_uint16"), val = uint16(1)]; + tensor var_234_shape_cast_fp16_to_uint16 = cast(dtype = var_234_shape_cast_fp16_to_uint16_dtype_0, x = var_234_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_8_cast_uint16 = gather(axis = gather_8_axis_0, batch_dims = gather_8_batch_dims_0, indices = select_8_to_uint16, validate_indices = gather_8_validate_indices_0, x = var_234_shape_cast_fp16_to_uint16)[name = string("gather_8_cast_uint16")]; + string gather_8_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_8_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_43_axes_0 = const()[name = string("expand_dims_43_axes_0"), val = tensor([0])]; + int32 gather_8_cast_uint16_to_int32 = cast(dtype = gather_8_cast_uint16_to_int32_dtype_0, x = gather_8_cast_uint16)[name = string("cast_62")]; + tensor expand_dims_43 = expand_dims(axes = expand_dims_43_axes_0, x = gather_8_cast_uint16_to_int32)[name = string("expand_dims_43")]; + tensor concat_29 = const()[name = string("concat_29"), val = tensor([4, 0, 0, 0])]; + tensor concat_30_values0_0 = const()[name = string("concat_30_values0_0"), val = tensor([0])]; + tensor concat_30_values1_0 = const()[name = string("concat_30_values1_0"), val = tensor([0])]; + tensor concat_30_values3_0 = const()[name = string("concat_30_values3_0"), val = tensor([0])]; + int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)]; + bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)]; + tensor concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (concat_30_values0_0, concat_30_values1_0, expand_dims_43, concat_30_values3_0))[name = string("concat_30")]; + tensor k_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_29, begin_mask = k_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_30, end_mask = k_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_5_stride_0, update = linear_8_cast_fp16, x = coreml_update_state_34)[name = string("k_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_5_cast_fp16, input = k_cache2)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_36 = read_state(input = k_cache2)[name = string("coreml_update_state_36")]; + tensor var_239_shape_cast_fp16 = shape(x = linear_9_cast_fp16)[name = string("op_239_shape_cast_fp16")]; + int32 gather_9_axis_0 = const()[name = string("gather_9_axis_0"), val = int32(0)]; + int32 gather_9_batch_dims_0 = const()[name = string("gather_9_batch_dims_0"), val = int32(0)]; + bool gather_9_validate_indices_0 = const()[name = string("gather_9_validate_indices_0"), val = bool(false)]; + string var_239_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_239_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_9_to_uint16 = const()[name = string("select_9_to_uint16"), val = uint16(1)]; + tensor var_239_shape_cast_fp16_to_uint16 = cast(dtype = var_239_shape_cast_fp16_to_uint16_dtype_0, x = var_239_shape_cast_fp16)[name = string("cast_61")]; + uint16 gather_9_cast_uint16 = gather(axis = gather_9_axis_0, batch_dims = gather_9_batch_dims_0, indices = select_9_to_uint16, validate_indices = gather_9_validate_indices_0, x = var_239_shape_cast_fp16_to_uint16)[name = string("gather_9_cast_uint16")]; + string gather_9_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_9_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_47_axes_0 = const()[name = string("expand_dims_47_axes_0"), val = tensor([0])]; + int32 gather_9_cast_uint16_to_int32 = cast(dtype = gather_9_cast_uint16_to_int32_dtype_0, x = gather_9_cast_uint16)[name = string("cast_60")]; + tensor expand_dims_47 = expand_dims(axes = expand_dims_47_axes_0, x = gather_9_cast_uint16_to_int32)[name = string("expand_dims_47")]; + tensor concat_32 = const()[name = string("concat_32"), val = tensor([4, 0, 0, 0])]; + tensor concat_33_values0_0 = const()[name = string("concat_33_values0_0"), val = tensor([0])]; + tensor concat_33_values1_0 = const()[name = string("concat_33_values1_0"), val = tensor([0])]; + tensor concat_33_values3_0 = const()[name = string("concat_33_values3_0"), val = tensor([0])]; + int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; + bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; + tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (concat_33_values0_0, concat_33_values1_0, expand_dims_47, concat_33_values3_0))[name = string("concat_33")]; + tensor v_cache2_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_32, begin_mask = v_cache2_internal_tensor_assign_5_begin_mask_0, end = concat_33, end_mask = v_cache2_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_5_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_35)[name = string("v_cache2_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_5_cast_fp16, input = v_cache2)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_37 = read_state(input = v_cache2)[name = string("coreml_update_state_37")]; + tensor var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20064384)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_261_to_fp16, x = audio_data)[name = string("linear_10_cast_fp16")]; + tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21244096)))]; + tensor var_266_to_fp16 = const()[name = string("op_266_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22423808)))]; + tensor linear_11_cast_fp16 = linear(bias = var_266_to_fp16, weight = var_265_to_fp16, x = audio_data)[name = string("linear_11_cast_fp16")]; + tensor var_268_shape_cast_fp16 = shape(x = linear_10_cast_fp16)[name = string("op_268_shape_cast_fp16")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_268_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_268_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(1)]; + tensor var_268_shape_cast_fp16_to_uint16 = cast(dtype = var_268_shape_cast_fp16_to_uint16_dtype_0, x = var_268_shape_cast_fp16)[name = string("cast_59")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_268_shape_cast_fp16_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_58")]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = gather_10_cast_uint16_to_int32)[name = string("expand_dims_51")]; + tensor concat_35 = const()[name = string("concat_35"), val = tensor([5, 0, 0, 0])]; + tensor concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = tensor([0])]; + tensor concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = tensor([0])]; + tensor concat_36_values3_0 = const()[name = string("concat_36_values3_0"), val = tensor([0])]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, expand_dims_51, concat_36_values3_0))[name = string("concat_36")]; + tensor k_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_35, begin_mask = k_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_36, end_mask = k_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_6_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_36)[name = string("k_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_6_cast_fp16, input = k_cache2)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_38 = read_state(input = k_cache2)[name = string("coreml_update_state_38")]; + tensor var_273_shape_cast_fp16 = shape(x = linear_11_cast_fp16)[name = string("op_273_shape_cast_fp16")]; + int32 gather_11_axis_0 = const()[name = string("gather_11_axis_0"), val = int32(0)]; + int32 gather_11_batch_dims_0 = const()[name = string("gather_11_batch_dims_0"), val = int32(0)]; + bool gather_11_validate_indices_0 = const()[name = string("gather_11_validate_indices_0"), val = bool(false)]; + string var_273_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_273_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_11_to_uint16 = const()[name = string("select_11_to_uint16"), val = uint16(1)]; + tensor var_273_shape_cast_fp16_to_uint16 = cast(dtype = var_273_shape_cast_fp16_to_uint16_dtype_0, x = var_273_shape_cast_fp16)[name = string("cast_57")]; + uint16 gather_11_cast_uint16 = gather(axis = gather_11_axis_0, batch_dims = gather_11_batch_dims_0, indices = select_11_to_uint16, validate_indices = gather_11_validate_indices_0, x = var_273_shape_cast_fp16_to_uint16)[name = string("gather_11_cast_uint16")]; + string gather_11_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_11_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_55_axes_0 = const()[name = string("expand_dims_55_axes_0"), val = tensor([0])]; + int32 gather_11_cast_uint16_to_int32 = cast(dtype = gather_11_cast_uint16_to_int32_dtype_0, x = gather_11_cast_uint16)[name = string("cast_56")]; + tensor expand_dims_55 = expand_dims(axes = expand_dims_55_axes_0, x = gather_11_cast_uint16_to_int32)[name = string("expand_dims_55")]; + tensor concat_38 = const()[name = string("concat_38"), val = tensor([5, 0, 0, 0])]; + tensor concat_39_values0_0 = const()[name = string("concat_39_values0_0"), val = tensor([0])]; + tensor concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor([0])]; + tensor concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor([0])]; + int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)]; + bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)]; + tensor concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (concat_39_values0_0, concat_39_values1_0, expand_dims_55, concat_39_values3_0))[name = string("concat_39")]; + tensor v_cache2_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_38, begin_mask = v_cache2_internal_tensor_assign_6_begin_mask_0, end = concat_39, end_mask = v_cache2_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_6_stride_0, update = linear_11_cast_fp16, x = coreml_update_state_37)[name = string("v_cache2_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_6_cast_fp16, input = v_cache2)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_39 = read_state(input = v_cache2)[name = string("coreml_update_state_39")]; + tensor var_295_to_fp16 = const()[name = string("op_295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22425408)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_295_to_fp16, x = audio_data)[name = string("linear_12_cast_fp16")]; + tensor var_299_to_fp16 = const()[name = string("op_299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23605120)))]; + tensor var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24784832)))]; + tensor linear_13_cast_fp16 = linear(bias = var_300_to_fp16, weight = var_299_to_fp16, x = audio_data)[name = string("linear_13_cast_fp16")]; + tensor var_302_shape_cast_fp16 = shape(x = linear_12_cast_fp16)[name = string("op_302_shape_cast_fp16")]; + int32 gather_12_axis_0 = const()[name = string("gather_12_axis_0"), val = int32(0)]; + int32 gather_12_batch_dims_0 = const()[name = string("gather_12_batch_dims_0"), val = int32(0)]; + bool gather_12_validate_indices_0 = const()[name = string("gather_12_validate_indices_0"), val = bool(false)]; + string var_302_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_302_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_12_to_uint16 = const()[name = string("select_12_to_uint16"), val = uint16(1)]; + tensor var_302_shape_cast_fp16_to_uint16 = cast(dtype = var_302_shape_cast_fp16_to_uint16_dtype_0, x = var_302_shape_cast_fp16)[name = string("cast_55")]; + uint16 gather_12_cast_uint16 = gather(axis = gather_12_axis_0, batch_dims = gather_12_batch_dims_0, indices = select_12_to_uint16, validate_indices = gather_12_validate_indices_0, x = var_302_shape_cast_fp16_to_uint16)[name = string("gather_12_cast_uint16")]; + string gather_12_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_12_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_59_axes_0 = const()[name = string("expand_dims_59_axes_0"), val = tensor([0])]; + int32 gather_12_cast_uint16_to_int32 = cast(dtype = gather_12_cast_uint16_to_int32_dtype_0, x = gather_12_cast_uint16)[name = string("cast_54")]; + tensor expand_dims_59 = expand_dims(axes = expand_dims_59_axes_0, x = gather_12_cast_uint16_to_int32)[name = string("expand_dims_59")]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([6, 0, 0, 0])]; + tensor concat_42_values0_0 = const()[name = string("concat_42_values0_0"), val = tensor([0])]; + tensor concat_42_values1_0 = const()[name = string("concat_42_values1_0"), val = tensor([0])]; + tensor concat_42_values3_0 = const()[name = string("concat_42_values3_0"), val = tensor([0])]; + int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)]; + bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)]; + tensor concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (concat_42_values0_0, concat_42_values1_0, expand_dims_59, concat_42_values3_0))[name = string("concat_42")]; + tensor k_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_41, begin_mask = k_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_42, end_mask = k_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_7_stride_0, update = linear_12_cast_fp16, x = coreml_update_state_38)[name = string("k_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_7_cast_fp16, input = k_cache2)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_40 = read_state(input = k_cache2)[name = string("coreml_update_state_40")]; + tensor var_307_shape_cast_fp16 = shape(x = linear_13_cast_fp16)[name = string("op_307_shape_cast_fp16")]; + int32 gather_13_axis_0 = const()[name = string("gather_13_axis_0"), val = int32(0)]; + int32 gather_13_batch_dims_0 = const()[name = string("gather_13_batch_dims_0"), val = int32(0)]; + bool gather_13_validate_indices_0 = const()[name = string("gather_13_validate_indices_0"), val = bool(false)]; + string var_307_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_307_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_13_to_uint16 = const()[name = string("select_13_to_uint16"), val = uint16(1)]; + tensor var_307_shape_cast_fp16_to_uint16 = cast(dtype = var_307_shape_cast_fp16_to_uint16_dtype_0, x = var_307_shape_cast_fp16)[name = string("cast_53")]; + uint16 gather_13_cast_uint16 = gather(axis = gather_13_axis_0, batch_dims = gather_13_batch_dims_0, indices = select_13_to_uint16, validate_indices = gather_13_validate_indices_0, x = var_307_shape_cast_fp16_to_uint16)[name = string("gather_13_cast_uint16")]; + string gather_13_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_13_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_63_axes_0 = const()[name = string("expand_dims_63_axes_0"), val = tensor([0])]; + int32 gather_13_cast_uint16_to_int32 = cast(dtype = gather_13_cast_uint16_to_int32_dtype_0, x = gather_13_cast_uint16)[name = string("cast_52")]; + tensor expand_dims_63 = expand_dims(axes = expand_dims_63_axes_0, x = gather_13_cast_uint16_to_int32)[name = string("expand_dims_63")]; + tensor concat_44 = const()[name = string("concat_44"), val = tensor([6, 0, 0, 0])]; + tensor concat_45_values0_0 = const()[name = string("concat_45_values0_0"), val = tensor([0])]; + tensor concat_45_values1_0 = const()[name = string("concat_45_values1_0"), val = tensor([0])]; + tensor concat_45_values3_0 = const()[name = string("concat_45_values3_0"), val = tensor([0])]; + int32 concat_45_axis_0 = const()[name = string("concat_45_axis_0"), val = int32(0)]; + bool concat_45_interleave_0 = const()[name = string("concat_45_interleave_0"), val = bool(false)]; + tensor concat_45 = concat(axis = concat_45_axis_0, interleave = concat_45_interleave_0, values = (concat_45_values0_0, concat_45_values1_0, expand_dims_63, concat_45_values3_0))[name = string("concat_45")]; + tensor v_cache2_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_44, begin_mask = v_cache2_internal_tensor_assign_7_begin_mask_0, end = concat_45, end_mask = v_cache2_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_7_stride_0, update = linear_13_cast_fp16, x = coreml_update_state_39)[name = string("v_cache2_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_7_cast_fp16, input = v_cache2)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_41 = read_state(input = v_cache2)[name = string("coreml_update_state_41")]; + tensor var_329_to_fp16 = const()[name = string("op_329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24786432)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_329_to_fp16, x = audio_data)[name = string("linear_14_cast_fp16")]; + tensor var_333_to_fp16 = const()[name = string("op_333_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25966144)))]; + tensor var_334_to_fp16 = const()[name = string("op_334_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27145856)))]; + tensor linear_15_cast_fp16 = linear(bias = var_334_to_fp16, weight = var_333_to_fp16, x = audio_data)[name = string("linear_15_cast_fp16")]; + tensor var_336_shape_cast_fp16 = shape(x = linear_14_cast_fp16)[name = string("op_336_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_336_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_336_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_336_shape_cast_fp16_to_uint16 = cast(dtype = var_336_shape_cast_fp16_to_uint16_dtype_0, x = var_336_shape_cast_fp16)[name = string("cast_51")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_336_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_50")]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = gather_14_cast_uint16_to_int32)[name = string("expand_dims_67")]; + tensor concat_47 = const()[name = string("concat_47"), val = tensor([7, 0, 0, 0])]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([0])]; + tensor concat_48_values1_0 = const()[name = string("concat_48_values1_0"), val = tensor([0])]; + tensor concat_48_values3_0 = const()[name = string("concat_48_values3_0"), val = tensor([0])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, concat_48_values1_0, expand_dims_67, concat_48_values3_0))[name = string("concat_48")]; + tensor k_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_47, begin_mask = k_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_48, end_mask = k_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_8_stride_0, update = linear_14_cast_fp16, x = coreml_update_state_40)[name = string("k_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_8_cast_fp16, input = k_cache2)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_42 = read_state(input = k_cache2)[name = string("coreml_update_state_42")]; + tensor var_341_shape_cast_fp16 = shape(x = linear_15_cast_fp16)[name = string("op_341_shape_cast_fp16")]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_341_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_341_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(1)]; + tensor var_341_shape_cast_fp16_to_uint16 = cast(dtype = var_341_shape_cast_fp16_to_uint16_dtype_0, x = var_341_shape_cast_fp16)[name = string("cast_49")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_341_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_71_axes_0 = const()[name = string("expand_dims_71_axes_0"), val = tensor([0])]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_48")]; + tensor expand_dims_71 = expand_dims(axes = expand_dims_71_axes_0, x = gather_15_cast_uint16_to_int32)[name = string("expand_dims_71")]; + tensor concat_50 = const()[name = string("concat_50"), val = tensor([7, 0, 0, 0])]; + tensor concat_51_values0_0 = const()[name = string("concat_51_values0_0"), val = tensor([0])]; + tensor concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor([0])]; + tensor concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor([0])]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (concat_51_values0_0, concat_51_values1_0, expand_dims_71, concat_51_values3_0))[name = string("concat_51")]; + tensor v_cache2_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_50, begin_mask = v_cache2_internal_tensor_assign_8_begin_mask_0, end = concat_51, end_mask = v_cache2_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_8_stride_0, update = linear_15_cast_fp16, x = coreml_update_state_41)[name = string("v_cache2_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_8_cast_fp16, input = v_cache2)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_43 = read_state(input = v_cache2)[name = string("coreml_update_state_43")]; + tensor var_363_to_fp16 = const()[name = string("op_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27147456)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_363_to_fp16, x = audio_data)[name = string("linear_16_cast_fp16")]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28327168)))]; + tensor var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29506880)))]; + tensor linear_17_cast_fp16 = linear(bias = var_368_to_fp16, weight = var_367_to_fp16, x = audio_data)[name = string("linear_17_cast_fp16")]; + tensor var_370_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_370_shape_cast_fp16")]; + int32 gather_16_axis_0 = const()[name = string("gather_16_axis_0"), val = int32(0)]; + int32 gather_16_batch_dims_0 = const()[name = string("gather_16_batch_dims_0"), val = int32(0)]; + bool gather_16_validate_indices_0 = const()[name = string("gather_16_validate_indices_0"), val = bool(false)]; + string var_370_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_370_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_16_to_uint16 = const()[name = string("select_16_to_uint16"), val = uint16(1)]; + tensor var_370_shape_cast_fp16_to_uint16 = cast(dtype = var_370_shape_cast_fp16_to_uint16_dtype_0, x = var_370_shape_cast_fp16)[name = string("cast_47")]; + uint16 gather_16_cast_uint16 = gather(axis = gather_16_axis_0, batch_dims = gather_16_batch_dims_0, indices = select_16_to_uint16, validate_indices = gather_16_validate_indices_0, x = var_370_shape_cast_fp16_to_uint16)[name = string("gather_16_cast_uint16")]; + string gather_16_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_16_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_75_axes_0 = const()[name = string("expand_dims_75_axes_0"), val = tensor([0])]; + int32 gather_16_cast_uint16_to_int32 = cast(dtype = gather_16_cast_uint16_to_int32_dtype_0, x = gather_16_cast_uint16)[name = string("cast_46")]; + tensor expand_dims_75 = expand_dims(axes = expand_dims_75_axes_0, x = gather_16_cast_uint16_to_int32)[name = string("expand_dims_75")]; + tensor concat_53 = const()[name = string("concat_53"), val = tensor([8, 0, 0, 0])]; + tensor concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = tensor([0])]; + tensor concat_54_values1_0 = const()[name = string("concat_54_values1_0"), val = tensor([0])]; + tensor concat_54_values3_0 = const()[name = string("concat_54_values3_0"), val = tensor([0])]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, concat_54_values1_0, expand_dims_75, concat_54_values3_0))[name = string("concat_54")]; + tensor k_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_53, begin_mask = k_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_54, end_mask = k_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_9_stride_0, update = linear_16_cast_fp16, x = coreml_update_state_42)[name = string("k_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_9_cast_fp16, input = k_cache2)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_44 = read_state(input = k_cache2)[name = string("coreml_update_state_44")]; + tensor var_375_shape_cast_fp16 = shape(x = linear_17_cast_fp16)[name = string("op_375_shape_cast_fp16")]; + int32 gather_17_axis_0 = const()[name = string("gather_17_axis_0"), val = int32(0)]; + int32 gather_17_batch_dims_0 = const()[name = string("gather_17_batch_dims_0"), val = int32(0)]; + bool gather_17_validate_indices_0 = const()[name = string("gather_17_validate_indices_0"), val = bool(false)]; + string var_375_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_375_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_17_to_uint16 = const()[name = string("select_17_to_uint16"), val = uint16(1)]; + tensor var_375_shape_cast_fp16_to_uint16 = cast(dtype = var_375_shape_cast_fp16_to_uint16_dtype_0, x = var_375_shape_cast_fp16)[name = string("cast_45")]; + uint16 gather_17_cast_uint16 = gather(axis = gather_17_axis_0, batch_dims = gather_17_batch_dims_0, indices = select_17_to_uint16, validate_indices = gather_17_validate_indices_0, x = var_375_shape_cast_fp16_to_uint16)[name = string("gather_17_cast_uint16")]; + string gather_17_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_17_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_79_axes_0 = const()[name = string("expand_dims_79_axes_0"), val = tensor([0])]; + int32 gather_17_cast_uint16_to_int32 = cast(dtype = gather_17_cast_uint16_to_int32_dtype_0, x = gather_17_cast_uint16)[name = string("cast_44")]; + tensor expand_dims_79 = expand_dims(axes = expand_dims_79_axes_0, x = gather_17_cast_uint16_to_int32)[name = string("expand_dims_79")]; + tensor concat_56 = const()[name = string("concat_56"), val = tensor([8, 0, 0, 0])]; + tensor concat_57_values0_0 = const()[name = string("concat_57_values0_0"), val = tensor([0])]; + tensor concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor([0])]; + tensor concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor([0])]; + int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; + bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; + tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (concat_57_values0_0, concat_57_values1_0, expand_dims_79, concat_57_values3_0))[name = string("concat_57")]; + tensor v_cache2_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_56, begin_mask = v_cache2_internal_tensor_assign_9_begin_mask_0, end = concat_57, end_mask = v_cache2_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_9_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_43)[name = string("v_cache2_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_9_cast_fp16, input = v_cache2)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_45 = read_state(input = v_cache2)[name = string("coreml_update_state_45")]; + tensor var_397_to_fp16 = const()[name = string("op_397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29508480)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_397_to_fp16, x = audio_data)[name = string("linear_18_cast_fp16")]; + tensor var_401_to_fp16 = const()[name = string("op_401_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30688192)))]; + tensor var_402_to_fp16 = const()[name = string("op_402_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31867904)))]; + tensor linear_19_cast_fp16 = linear(bias = var_402_to_fp16, weight = var_401_to_fp16, x = audio_data)[name = string("linear_19_cast_fp16")]; + tensor var_404_shape_cast_fp16 = shape(x = linear_18_cast_fp16)[name = string("op_404_shape_cast_fp16")]; + int32 gather_18_axis_0 = const()[name = string("gather_18_axis_0"), val = int32(0)]; + int32 gather_18_batch_dims_0 = const()[name = string("gather_18_batch_dims_0"), val = int32(0)]; + bool gather_18_validate_indices_0 = const()[name = string("gather_18_validate_indices_0"), val = bool(false)]; + string var_404_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_404_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_18_to_uint16 = const()[name = string("select_18_to_uint16"), val = uint16(1)]; + tensor var_404_shape_cast_fp16_to_uint16 = cast(dtype = var_404_shape_cast_fp16_to_uint16_dtype_0, x = var_404_shape_cast_fp16)[name = string("cast_43")]; + uint16 gather_18_cast_uint16 = gather(axis = gather_18_axis_0, batch_dims = gather_18_batch_dims_0, indices = select_18_to_uint16, validate_indices = gather_18_validate_indices_0, x = var_404_shape_cast_fp16_to_uint16)[name = string("gather_18_cast_uint16")]; + string gather_18_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_18_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + int32 gather_18_cast_uint16_to_int32 = cast(dtype = gather_18_cast_uint16_to_int32_dtype_0, x = gather_18_cast_uint16)[name = string("cast_42")]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = gather_18_cast_uint16_to_int32)[name = string("expand_dims_83")]; + tensor concat_59 = const()[name = string("concat_59"), val = tensor([9, 0, 0, 0])]; + tensor concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = tensor([0])]; + tensor concat_60_values1_0 = const()[name = string("concat_60_values1_0"), val = tensor([0])]; + tensor concat_60_values3_0 = const()[name = string("concat_60_values3_0"), val = tensor([0])]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, concat_60_values1_0, expand_dims_83, concat_60_values3_0))[name = string("concat_60")]; + tensor k_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_59, begin_mask = k_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_60, end_mask = k_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_10_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_44)[name = string("k_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_10_cast_fp16, input = k_cache2)[name = string("coreml_update_state_46_write_state")]; + tensor coreml_update_state_46 = read_state(input = k_cache2)[name = string("coreml_update_state_46")]; + tensor var_409_shape_cast_fp16 = shape(x = linear_19_cast_fp16)[name = string("op_409_shape_cast_fp16")]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_409_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_409_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(1)]; + tensor var_409_shape_cast_fp16_to_uint16 = cast(dtype = var_409_shape_cast_fp16_to_uint16_dtype_0, x = var_409_shape_cast_fp16)[name = string("cast_41")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_409_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_87_axes_0 = const()[name = string("expand_dims_87_axes_0"), val = tensor([0])]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_40")]; + tensor expand_dims_87 = expand_dims(axes = expand_dims_87_axes_0, x = gather_19_cast_uint16_to_int32)[name = string("expand_dims_87")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([9, 0, 0, 0])]; + tensor concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor([0])]; + tensor concat_63_values1_0 = const()[name = string("concat_63_values1_0"), val = tensor([0])]; + tensor concat_63_values3_0 = const()[name = string("concat_63_values3_0"), val = tensor([0])]; + int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; + bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; + tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, concat_63_values1_0, expand_dims_87, concat_63_values3_0))[name = string("concat_63")]; + tensor v_cache2_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_62, begin_mask = v_cache2_internal_tensor_assign_10_begin_mask_0, end = concat_63, end_mask = v_cache2_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_10_stride_0, update = linear_19_cast_fp16, x = coreml_update_state_45)[name = string("v_cache2_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_10_cast_fp16, input = v_cache2)[name = string("coreml_update_state_47_write_state")]; + tensor coreml_update_state_47 = read_state(input = v_cache2)[name = string("coreml_update_state_47")]; + tensor var_431_to_fp16 = const()[name = string("op_431_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31869504)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_431_to_fp16, x = audio_data)[name = string("linear_20_cast_fp16")]; + tensor var_435_to_fp16 = const()[name = string("op_435_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33049216)))]; + tensor var_436_to_fp16 = const()[name = string("op_436_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34228928)))]; + tensor linear_21_cast_fp16 = linear(bias = var_436_to_fp16, weight = var_435_to_fp16, x = audio_data)[name = string("linear_21_cast_fp16")]; + tensor var_438_shape_cast_fp16 = shape(x = linear_20_cast_fp16)[name = string("op_438_shape_cast_fp16")]; + int32 gather_20_axis_0 = const()[name = string("gather_20_axis_0"), val = int32(0)]; + int32 gather_20_batch_dims_0 = const()[name = string("gather_20_batch_dims_0"), val = int32(0)]; + bool gather_20_validate_indices_0 = const()[name = string("gather_20_validate_indices_0"), val = bool(false)]; + string var_438_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_438_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_20_to_uint16 = const()[name = string("select_20_to_uint16"), val = uint16(1)]; + tensor var_438_shape_cast_fp16_to_uint16 = cast(dtype = var_438_shape_cast_fp16_to_uint16_dtype_0, x = var_438_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_20_cast_uint16 = gather(axis = gather_20_axis_0, batch_dims = gather_20_batch_dims_0, indices = select_20_to_uint16, validate_indices = gather_20_validate_indices_0, x = var_438_shape_cast_fp16_to_uint16)[name = string("gather_20_cast_uint16")]; + string gather_20_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_20_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_91_axes_0 = const()[name = string("expand_dims_91_axes_0"), val = tensor([0])]; + int32 gather_20_cast_uint16_to_int32 = cast(dtype = gather_20_cast_uint16_to_int32_dtype_0, x = gather_20_cast_uint16)[name = string("cast_38")]; + tensor expand_dims_91 = expand_dims(axes = expand_dims_91_axes_0, x = gather_20_cast_uint16_to_int32)[name = string("expand_dims_91")]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([10, 0, 0, 0])]; + tensor concat_66_values0_0 = const()[name = string("concat_66_values0_0"), val = tensor([0])]; + tensor concat_66_values1_0 = const()[name = string("concat_66_values1_0"), val = tensor([0])]; + tensor concat_66_values3_0 = const()[name = string("concat_66_values3_0"), val = tensor([0])]; + int32 concat_66_axis_0 = const()[name = string("concat_66_axis_0"), val = int32(0)]; + bool concat_66_interleave_0 = const()[name = string("concat_66_interleave_0"), val = bool(false)]; + tensor concat_66 = concat(axis = concat_66_axis_0, interleave = concat_66_interleave_0, values = (concat_66_values0_0, concat_66_values1_0, expand_dims_91, concat_66_values3_0))[name = string("concat_66")]; + tensor k_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_65, begin_mask = k_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_66, end_mask = k_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_11_stride_0, update = linear_20_cast_fp16, x = coreml_update_state_46)[name = string("k_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_11_cast_fp16, input = k_cache2)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_48 = read_state(input = k_cache2)[name = string("coreml_update_state_48")]; + tensor var_443_shape_cast_fp16 = shape(x = linear_21_cast_fp16)[name = string("op_443_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_443_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_443_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(1)]; + tensor var_443_shape_cast_fp16_to_uint16 = cast(dtype = var_443_shape_cast_fp16_to_uint16_dtype_0, x = var_443_shape_cast_fp16)[name = string("cast_37")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_443_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_95_axes_0 = const()[name = string("expand_dims_95_axes_0"), val = tensor([0])]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_36")]; + tensor expand_dims_95 = expand_dims(axes = expand_dims_95_axes_0, x = gather_21_cast_uint16_to_int32)[name = string("expand_dims_95")]; + tensor concat_68 = const()[name = string("concat_68"), val = tensor([10, 0, 0, 0])]; + tensor concat_69_values0_0 = const()[name = string("concat_69_values0_0"), val = tensor([0])]; + tensor concat_69_values1_0 = const()[name = string("concat_69_values1_0"), val = tensor([0])]; + tensor concat_69_values3_0 = const()[name = string("concat_69_values3_0"), val = tensor([0])]; + int32 concat_69_axis_0 = const()[name = string("concat_69_axis_0"), val = int32(0)]; + bool concat_69_interleave_0 = const()[name = string("concat_69_interleave_0"), val = bool(false)]; + tensor concat_69 = concat(axis = concat_69_axis_0, interleave = concat_69_interleave_0, values = (concat_69_values0_0, concat_69_values1_0, expand_dims_95, concat_69_values3_0))[name = string("concat_69")]; + tensor v_cache2_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_68, begin_mask = v_cache2_internal_tensor_assign_11_begin_mask_0, end = concat_69, end_mask = v_cache2_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_11_stride_0, update = linear_21_cast_fp16, x = coreml_update_state_47)[name = string("v_cache2_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_11_cast_fp16, input = v_cache2)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_49 = read_state(input = v_cache2)[name = string("coreml_update_state_49")]; + tensor var_465_to_fp16 = const()[name = string("op_465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34230528)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_465_to_fp16, x = audio_data)[name = string("linear_22_cast_fp16")]; + tensor var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35410240)))]; + tensor var_470_to_fp16 = const()[name = string("op_470_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36589952)))]; + tensor linear_23_cast_fp16 = linear(bias = var_470_to_fp16, weight = var_469_to_fp16, x = audio_data)[name = string("linear_23_cast_fp16")]; + tensor var_472_shape_cast_fp16 = shape(x = linear_22_cast_fp16)[name = string("op_472_shape_cast_fp16")]; + int32 gather_22_axis_0 = const()[name = string("gather_22_axis_0"), val = int32(0)]; + int32 gather_22_batch_dims_0 = const()[name = string("gather_22_batch_dims_0"), val = int32(0)]; + bool gather_22_validate_indices_0 = const()[name = string("gather_22_validate_indices_0"), val = bool(false)]; + string var_472_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_472_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_22_to_uint16 = const()[name = string("select_22_to_uint16"), val = uint16(1)]; + tensor var_472_shape_cast_fp16_to_uint16 = cast(dtype = var_472_shape_cast_fp16_to_uint16_dtype_0, x = var_472_shape_cast_fp16)[name = string("cast_35")]; + uint16 gather_22_cast_uint16 = gather(axis = gather_22_axis_0, batch_dims = gather_22_batch_dims_0, indices = select_22_to_uint16, validate_indices = gather_22_validate_indices_0, x = var_472_shape_cast_fp16_to_uint16)[name = string("gather_22_cast_uint16")]; + string gather_22_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_22_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + int32 gather_22_cast_uint16_to_int32 = cast(dtype = gather_22_cast_uint16_to_int32_dtype_0, x = gather_22_cast_uint16)[name = string("cast_34")]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = gather_22_cast_uint16_to_int32)[name = string("expand_dims_99")]; + tensor concat_71 = const()[name = string("concat_71"), val = tensor([11, 0, 0, 0])]; + tensor concat_72_values0_0 = const()[name = string("concat_72_values0_0"), val = tensor([0])]; + tensor concat_72_values1_0 = const()[name = string("concat_72_values1_0"), val = tensor([0])]; + tensor concat_72_values3_0 = const()[name = string("concat_72_values3_0"), val = tensor([0])]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (concat_72_values0_0, concat_72_values1_0, expand_dims_99, concat_72_values3_0))[name = string("concat_72")]; + tensor k_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_71, begin_mask = k_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_72, end_mask = k_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_12_stride_0, update = linear_22_cast_fp16, x = coreml_update_state_48)[name = string("k_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_12_cast_fp16, input = k_cache2)[name = string("coreml_update_state_50_write_state")]; + tensor var_477_shape_cast_fp16 = shape(x = linear_23_cast_fp16)[name = string("op_477_shape_cast_fp16")]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_477_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_477_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_477_shape_cast_fp16_to_uint16 = cast(dtype = var_477_shape_cast_fp16_to_uint16_dtype_0, x = var_477_shape_cast_fp16)[name = string("cast_33")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_477_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_103_axes_0 = const()[name = string("expand_dims_103_axes_0"), val = tensor([0])]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_32")]; + tensor expand_dims_103 = expand_dims(axes = expand_dims_103_axes_0, x = gather_23_cast_uint16_to_int32)[name = string("expand_dims_103")]; + tensor concat_74 = const()[name = string("concat_74"), val = tensor([11, 0, 0, 0])]; + tensor concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = tensor([0])]; + tensor concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor([0])]; + tensor concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor([0])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, expand_dims_103, concat_75_values3_0))[name = string("concat_75")]; + tensor v_cache2_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_74, begin_mask = v_cache2_internal_tensor_assign_12_begin_mask_0, end = concat_75, end_mask = v_cache2_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_12_stride_0, update = linear_23_cast_fp16, x = coreml_update_state_49)[name = string("v_cache2_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_12_cast_fp16, input = v_cache2)[name = string("coreml_update_state_51_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/small/decoder_first.mlmodelc/weights/weight.bin b/small/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..10b4ed02bb0d11bb4330f010cb389bbb65df12f5 --- /dev/null +++ b/small/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3059d6670bb065c1eefeff01f7c5496af03cd5f48621357792473c5b63044b3e +size 36591552 diff --git a/small/decoder_second.mlmodelc/analytics/coremldata.bin b/small/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..91ade0def0e2444c4273db0aaf39fad3ca7f7067 --- /dev/null +++ b/small/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b477d0308b85c7fa966f41c8bb93a37823206812e444a48a26fb153629700dd9 +size 243 diff --git a/small/decoder_second.mlmodelc/coremldata.bin b/small/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f2ca8662614ee4c829f6f3eb183306165e77122 --- /dev/null +++ b/small/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e102e9f0547bcdce522991060c135b72b7dfeca80443bb17033816100bc5841 +size 487 diff --git a/small/decoder_second.mlmodelc/metadata.json b/small/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..90f741660187450796a7f8b2a43fa2fa75181984 --- /dev/null +++ b/small/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 97, + "Ios18.readState" : 26, + "Ios18.expandDims" : 13, + "Ios18.sub" : 1, + "Ios18.matmul" : 48, + "Ios18.gelu" : 12, + "Ios18.gather" : 15, + "Ios18.concat" : 62, + "Shape" : 14, + "Ios18.add" : 61, + "Ios18.sliceUpdate" : 48, + "Ios18.sliceByIndex" : 97, + "Ios18.layerNorm" : 37, + "Ios18.cast" : 28, + "Ios18.transpose" : 96, + "Ios18.writeState" : 24, + "Ios18.reshape" : 96, + "Ios18.softmax" : 24, + "Ios18.mul" : 48 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 448 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 448, 768]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 448 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 448, 768]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 1500 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 1500, 768]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 12 × 1 × 1500 × 768)", + "shortDescription" : "", + "shape" : "[12, 1, 1500, 768]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/small/decoder_second.mlmodelc/model.mil b/small/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..0cda46e1dd0c22cd1586d0e31cc8cc229178acfc --- /dev/null +++ b/small/decoder_second.mlmodelc/model.mil @@ -0,0 +1,2398 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_38_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_38_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_38_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_38_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_38_shape_cast_fp16_to_int16 = cast(dtype = var_38_shape_cast_fp16_to_int16_dtype_0, x = var_38_shape_cast_fp16)[name = string("cast_154")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_38_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_42_shape = shape(x = token_data)[name = string("op_42_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_42_shape_to_uint16_dtype_0 = const()[name = string("op_42_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_42_shape_to_uint16 = cast(dtype = var_42_shape_to_uint16_dtype_0, x = var_42_shape)[name = string("cast_152")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_42_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_151")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_153")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_74_axis_0 = const()[name = string("op_74_axis_0"), val = int32(0)]; + int32 var_74_batch_dims_0 = const()[name = string("op_74_batch_dims_0"), val = int32(0)]; + bool var_74_validate_indices_0 = const()[name = string("op_74_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_74_cast_fp16 = gather(axis = var_74_axis_0, batch_dims = var_74_batch_dims_0, indices = token_data, validate_indices = var_74_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_74_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(768)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_77_end_mask_0 = const()[name = string("op_77_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79664768)))]; + tensor var_77_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_77_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_77_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_74_cast_fp16, y = var_77_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 768])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 768])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 768])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 768])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_100 = const()[name = string("op_100"), val = int32(-1)]; + tensor var_118_axes_0 = const()[name = string("op_118_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80352960)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80354560)))]; + fp16 var_106_to_fp16 = const()[name = string("op_106_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_118_cast_fp16 = layer_norm(axes = var_118_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_118_cast_fp16")]; + tensor var_129_to_fp16 = const()[name = string("op_129_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80356160)))]; + tensor var_130_to_fp16 = const()[name = string("op_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81535872)))]; + tensor linear_0_cast_fp16 = linear(bias = var_130_to_fp16, weight = var_129_to_fp16, x = var_118_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_133_to_fp16 = const()[name = string("op_133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81537472)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82717184)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_133_to_fp16, x = var_118_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_137_to_fp16 = const()[name = string("op_137_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82718784)))]; + tensor var_138_to_fp16 = const()[name = string("op_138_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83898496)))]; + tensor linear_2_cast_fp16 = linear(bias = var_138_to_fp16, weight = var_137_to_fp16, x = var_118_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_140_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_140_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_140_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_140_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_140_shape_cast_fp16_to_uint16 = cast(dtype = var_140_shape_cast_fp16_to_uint16_dtype_0, x = var_140_shape_cast_fp16)[name = string("cast_150")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_140_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_149")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = k_cache1)[name = string("coreml_update_state_24")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = v_cache1)[name = string("coreml_update_state_25")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(768)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_156_begin_0 = const()[name = string("op_156_begin_0"), val = tensor([0, 0, 0])]; + tensor var_156_end_mask_0 = const()[name = string("op_156_end_mask_0"), val = tensor([true, false, true])]; + tensor var_156_cast_fp16 = slice_by_index(begin = var_156_begin_0, end = concat_10, end_mask = var_156_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_156_cast_fp16")]; + tensor var_159_begin_0 = const()[name = string("op_159_begin_0"), val = tensor([0, 0, 0])]; + tensor var_159_end_mask_0 = const()[name = string("op_159_end_mask_0"), val = tensor([true, false, true])]; + tensor var_159_cast_fp16 = slice_by_index(begin = var_159_begin_0, end = concat_10, end_mask = var_159_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_159_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 12, 64])]; + tensor var_169_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_169_cast_fp16")]; + tensor const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_169_cast_fp16, y = const_60_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 12, 64])]; + tensor var_176_cast_fp16 = reshape(shape = concat_13x, x = var_156_cast_fp16)[name = string("op_176_cast_fp16")]; + tensor const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_176_cast_fp16, y = const_61_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 12, 64])]; + tensor var_183_cast_fp16 = reshape(shape = concat_14x, x = var_159_cast_fp16)[name = string("op_183_cast_fp16")]; + tensor var_184 = const()[name = string("op_184"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = k_5_cast_fp16)[name = string("transpose_238")]; + tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = q_3_cast_fp16)[name = string("transpose_239")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_97, y = transpose_98)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_187_begin_0 = const()[name = string("op_187_begin_0"), val = tensor([0, 0])]; + tensor var_187_end_mask_0 = const()[name = string("op_187_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83900096)))]; + tensor var_187_cast_fp16 = slice_by_index(begin = var_187_begin_0, end = concat_15, end_mask = var_187_end_mask_0, x = mask_to_fp16)[name = string("op_187_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_188_begin_0 = const()[name = string("op_188_begin_0"), val = tensor([0, 0])]; + tensor var_188_end_mask_0 = const()[name = string("op_188_end_mask_0"), val = tensor([true, false])]; + tensor var_188_cast_fp16 = slice_by_index(begin = var_188_begin_0, end = concat_16, end_mask = var_188_end_mask_0, x = var_187_cast_fp16)[name = string("op_188_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_188_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_191_cast_fp16 = softmax(axis = var_100, x = qk_3_cast_fp16)[name = string("op_191_cast_fp16")]; + bool var_193_transpose_x_0 = const()[name = string("op_193_transpose_x_0"), val = bool(false)]; + bool var_193_transpose_y_0 = const()[name = string("op_193_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_184, x = var_183_cast_fp16)[name = string("transpose_240")]; + tensor var_193_cast_fp16 = matmul(transpose_x = var_193_transpose_x_0, transpose_y = var_193_transpose_y_0, x = var_191_cast_fp16, y = v_5_cast_fp16)[name = string("op_193_cast_fp16")]; + tensor var_194 = const()[name = string("op_194"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 768])]; + tensor var_195_cast_fp16 = transpose(perm = var_194, x = var_193_cast_fp16)[name = string("transpose_237")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_195_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84301568)))]; + tensor var_200_to_fp16 = const()[name = string("op_200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85481280)))]; + tensor linear_3_cast_fp16 = linear(bias = var_200_to_fp16, weight = var_199_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_207_axes_0 = const()[name = string("op_207_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85482880)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85484480)))]; + tensor var_207_cast_fp16 = layer_norm(axes = var_207_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_207_cast_fp16")]; + tensor var_216_to_fp16 = const()[name = string("op_216_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85486080)))]; + tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86665792)))]; + tensor linear_4_cast_fp16 = linear(bias = var_217_to_fp16, weight = var_216_to_fp16, x = var_207_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86667392)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 12, 64])]; + tensor var_237_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_237_cast_fp16")]; + tensor const_62_to_fp16 = const()[name = string("const_62_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_237_cast_fp16, y = const_62_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_243 = const()[name = string("op_243"), val = tensor([1, 1500, 12, -1])]; + tensor var_244_cast_fp16 = reshape(shape = var_243, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_244_cast_fp16")]; + tensor const_63_to_fp16 = const()[name = string("const_63_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_244_cast_fp16, y = const_63_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_250 = const()[name = string("op_250"), val = tensor([1, 1500, 12, -1])]; + tensor var_251_cast_fp16 = reshape(shape = var_250, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_251_cast_fp16")]; + tensor var_252 = const()[name = string("op_252"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = k_9_cast_fp16)[name = string("transpose_234")]; + tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = q_7_cast_fp16)[name = string("transpose_235")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_99, y = transpose_100)[name = string("qk_5_cast_fp16")]; + tensor var_256_cast_fp16 = softmax(axis = var_100, x = qk_5_cast_fp16)[name = string("op_256_cast_fp16")]; + bool var_258_transpose_x_0 = const()[name = string("op_258_transpose_x_0"), val = bool(false)]; + bool var_258_transpose_y_0 = const()[name = string("op_258_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_252, x = var_251_cast_fp16)[name = string("transpose_236")]; + tensor var_258_cast_fp16 = matmul(transpose_x = var_258_transpose_x_0, transpose_y = var_258_transpose_y_0, x = var_256_cast_fp16, y = v_9_cast_fp16)[name = string("op_258_cast_fp16")]; + tensor var_259 = const()[name = string("op_259"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 768])]; + tensor var_260_cast_fp16 = transpose(perm = var_259, x = var_258_cast_fp16)[name = string("transpose_233")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_260_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_264_to_fp16 = const()[name = string("op_264_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88971456)))]; + tensor var_265_to_fp16 = const()[name = string("op_265_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90151168)))]; + tensor linear_5_cast_fp16 = linear(bias = var_265_to_fp16, weight = var_264_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_272_axes_0 = const()[name = string("op_272_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90152768)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90154368)))]; + tensor var_272_cast_fp16 = layer_norm(axes = var_272_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_106_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_272_cast_fp16")]; + tensor var_281_to_fp16 = const()[name = string("op_281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90155968)))]; + tensor var_282_to_fp16 = const()[name = string("op_282_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94874624)))]; + tensor linear_6_cast_fp16 = linear(bias = var_282_to_fp16, weight = var_281_to_fp16, x = var_272_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_287_to_fp16 = const()[name = string("op_287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94880832)))]; + tensor var_288_to_fp16 = const()[name = string("op_288_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99599488)))]; + tensor linear_7_cast_fp16 = linear(bias = var_288_to_fp16, weight = var_287_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 768])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_24)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 768])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_25)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 768])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 768])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_311 = const()[name = string("op_311"), val = int32(-1)]; + tensor var_329_axes_0 = const()[name = string("op_329_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99601088)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99602688)))]; + fp16 var_317_to_fp16 = const()[name = string("op_317_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_329_cast_fp16 = layer_norm(axes = var_329_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_329_cast_fp16")]; + tensor var_340_to_fp16 = const()[name = string("op_340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99604288)))]; + tensor var_341_to_fp16 = const()[name = string("op_341_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100784000)))]; + tensor linear_8_cast_fp16 = linear(bias = var_341_to_fp16, weight = var_340_to_fp16, x = var_329_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_344_to_fp16 = const()[name = string("op_344_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100785600)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_344_to_fp16, x = var_329_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101965312)))]; + tensor var_349_to_fp16 = const()[name = string("op_349_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103145024)))]; + tensor linear_10_cast_fp16 = linear(bias = var_349_to_fp16, weight = var_348_to_fp16, x = var_329_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_351_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_351_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_351_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_351_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_351_shape_cast_fp16_to_uint16 = cast(dtype = var_351_shape_cast_fp16_to_uint16_dtype_0, x = var_351_shape_cast_fp16)[name = string("cast_148")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_351_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_147")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_24)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = k_cache1)[name = string("coreml_update_state_26")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_25)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = v_cache1)[name = string("coreml_update_state_27")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(768)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_367_begin_0 = const()[name = string("op_367_begin_0"), val = tensor([0, 0, 0])]; + tensor var_367_end_mask_0 = const()[name = string("op_367_end_mask_0"), val = tensor([true, false, true])]; + tensor var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = concat_32, end_mask = var_367_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_367_cast_fp16")]; + tensor var_370_begin_0 = const()[name = string("op_370_begin_0"), val = tensor([0, 0, 0])]; + tensor var_370_end_mask_0 = const()[name = string("op_370_end_mask_0"), val = tensor([true, false, true])]; + tensor var_370_cast_fp16 = slice_by_index(begin = var_370_begin_0, end = concat_32, end_mask = var_370_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 12, 64])]; + tensor var_380_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_380_cast_fp16")]; + tensor const_64_to_fp16 = const()[name = string("const_64_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_380_cast_fp16, y = const_64_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 12, 64])]; + tensor var_387_cast_fp16 = reshape(shape = concat_35x, x = var_367_cast_fp16)[name = string("op_387_cast_fp16")]; + tensor const_65_to_fp16 = const()[name = string("const_65_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_387_cast_fp16, y = const_65_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 12, 64])]; + tensor var_394_cast_fp16 = reshape(shape = concat_36x, x = var_370_cast_fp16)[name = string("op_394_cast_fp16")]; + tensor var_395 = const()[name = string("op_395"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = k_15_cast_fp16)[name = string("transpose_230")]; + tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = q_11_cast_fp16)[name = string("transpose_231")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_101, y = transpose_102)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_398_begin_0 = const()[name = string("op_398_begin_0"), val = tensor([0, 0])]; + tensor var_398_end_mask_0 = const()[name = string("op_398_end_mask_0"), val = tensor([false, true])]; + tensor var_398_cast_fp16 = slice_by_index(begin = var_398_begin_0, end = concat_37, end_mask = var_398_end_mask_0, x = mask_to_fp16)[name = string("op_398_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_399_begin_0 = const()[name = string("op_399_begin_0"), val = tensor([0, 0])]; + tensor var_399_end_mask_0 = const()[name = string("op_399_end_mask_0"), val = tensor([true, false])]; + tensor var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = concat_38, end_mask = var_399_end_mask_0, x = var_398_cast_fp16)[name = string("op_399_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_399_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_402_cast_fp16 = softmax(axis = var_311, x = qk_9_cast_fp16)[name = string("op_402_cast_fp16")]; + bool var_404_transpose_x_0 = const()[name = string("op_404_transpose_x_0"), val = bool(false)]; + bool var_404_transpose_y_0 = const()[name = string("op_404_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_395, x = var_394_cast_fp16)[name = string("transpose_232")]; + tensor var_404_cast_fp16 = matmul(transpose_x = var_404_transpose_x_0, transpose_y = var_404_transpose_y_0, x = var_402_cast_fp16, y = v_15_cast_fp16)[name = string("op_404_cast_fp16")]; + tensor var_405 = const()[name = string("op_405"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 768])]; + tensor var_406_cast_fp16 = transpose(perm = var_405, x = var_404_cast_fp16)[name = string("transpose_229")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_406_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103146624)))]; + tensor var_411_to_fp16 = const()[name = string("op_411_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104326336)))]; + tensor linear_11_cast_fp16 = linear(bias = var_411_to_fp16, weight = var_410_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104327936)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104329536)))]; + tensor var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_427_to_fp16 = const()[name = string("op_427_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104331136)))]; + tensor var_428_to_fp16 = const()[name = string("op_428_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105510848)))]; + tensor linear_12_cast_fp16 = linear(bias = var_428_to_fp16, weight = var_427_to_fp16, x = var_418_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 12, 64])]; + tensor var_448_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_448_cast_fp16")]; + tensor const_66_to_fp16 = const()[name = string("const_66_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_448_cast_fp16, y = const_66_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_454 = const()[name = string("op_454"), val = tensor([1, 1500, 12, -1])]; + tensor var_455_cast_fp16 = reshape(shape = var_454, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_455_cast_fp16")]; + tensor const_67_to_fp16 = const()[name = string("const_67_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_455_cast_fp16, y = const_67_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_461 = const()[name = string("op_461"), val = tensor([1, 1500, 12, -1])]; + tensor var_462_cast_fp16 = reshape(shape = var_461, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_462_cast_fp16")]; + tensor var_463 = const()[name = string("op_463"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = k_19_cast_fp16)[name = string("transpose_226")]; + tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = q_15_cast_fp16)[name = string("transpose_227")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_103, y = transpose_104)[name = string("qk_11_cast_fp16")]; + tensor var_467_cast_fp16 = softmax(axis = var_311, x = qk_11_cast_fp16)[name = string("op_467_cast_fp16")]; + bool var_469_transpose_x_0 = const()[name = string("op_469_transpose_x_0"), val = bool(false)]; + bool var_469_transpose_y_0 = const()[name = string("op_469_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_463, x = var_462_cast_fp16)[name = string("transpose_228")]; + tensor var_469_cast_fp16 = matmul(transpose_x = var_469_transpose_x_0, transpose_y = var_469_transpose_y_0, x = var_467_cast_fp16, y = v_19_cast_fp16)[name = string("op_469_cast_fp16")]; + tensor var_470 = const()[name = string("op_470"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 768])]; + tensor var_471_cast_fp16 = transpose(perm = var_470, x = var_469_cast_fp16)[name = string("transpose_225")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_471_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105512448)))]; + tensor var_476_to_fp16 = const()[name = string("op_476_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106692160)))]; + tensor linear_13_cast_fp16 = linear(bias = var_476_to_fp16, weight = var_475_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_483_axes_0 = const()[name = string("op_483_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106693760)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106695360)))]; + tensor var_483_cast_fp16 = layer_norm(axes = var_483_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_317_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_483_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106696960)))]; + tensor var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111415616)))]; + tensor linear_14_cast_fp16 = linear(bias = var_493_to_fp16, weight = var_492_to_fp16, x = var_483_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111421824)))]; + tensor var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116140480)))]; + tensor linear_15_cast_fp16 = linear(bias = var_499_to_fp16, weight = var_498_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 768])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_26)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 768])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_27)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 768])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 768])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_522 = const()[name = string("op_522"), val = int32(-1)]; + tensor var_540_axes_0 = const()[name = string("op_540_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116142080)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116143680)))]; + fp16 var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_540_cast_fp16 = layer_norm(axes = var_540_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_540_cast_fp16")]; + tensor var_551_to_fp16 = const()[name = string("op_551_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116145280)))]; + tensor var_552_to_fp16 = const()[name = string("op_552_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117324992)))]; + tensor linear_16_cast_fp16 = linear(bias = var_552_to_fp16, weight = var_551_to_fp16, x = var_540_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_555_to_fp16 = const()[name = string("op_555_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117326592)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_555_to_fp16, x = var_540_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_559_to_fp16 = const()[name = string("op_559_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118506304)))]; + tensor var_560_to_fp16 = const()[name = string("op_560_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119686016)))]; + tensor linear_18_cast_fp16 = linear(bias = var_560_to_fp16, weight = var_559_to_fp16, x = var_540_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_562_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_562_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_562_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_562_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_562_shape_cast_fp16_to_uint16 = cast(dtype = var_562_shape_cast_fp16_to_uint16_dtype_0, x = var_562_shape_cast_fp16)[name = string("cast_146")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_562_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_145")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_26)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = k_cache1)[name = string("coreml_update_state_28")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_27)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = v_cache1)[name = string("coreml_update_state_29")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(768)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_578_begin_0 = const()[name = string("op_578_begin_0"), val = tensor([0, 0, 0])]; + tensor var_578_end_mask_0 = const()[name = string("op_578_end_mask_0"), val = tensor([true, false, true])]; + tensor var_578_cast_fp16 = slice_by_index(begin = var_578_begin_0, end = concat_54, end_mask = var_578_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_578_cast_fp16")]; + tensor var_581_begin_0 = const()[name = string("op_581_begin_0"), val = tensor([0, 0, 0])]; + tensor var_581_end_mask_0 = const()[name = string("op_581_end_mask_0"), val = tensor([true, false, true])]; + tensor var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = concat_54, end_mask = var_581_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_581_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 12, 64])]; + tensor var_591_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_591_cast_fp16")]; + tensor const_68_to_fp16 = const()[name = string("const_68_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_591_cast_fp16, y = const_68_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 12, 64])]; + tensor var_598_cast_fp16 = reshape(shape = concat_57x, x = var_578_cast_fp16)[name = string("op_598_cast_fp16")]; + tensor const_69_to_fp16 = const()[name = string("const_69_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_598_cast_fp16, y = const_69_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 12, 64])]; + tensor var_605_cast_fp16 = reshape(shape = concat_58x, x = var_581_cast_fp16)[name = string("op_605_cast_fp16")]; + tensor var_606 = const()[name = string("op_606"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = k_25_cast_fp16)[name = string("transpose_222")]; + tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = q_19_cast_fp16)[name = string("transpose_223")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_105, y = transpose_106)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_609_begin_0 = const()[name = string("op_609_begin_0"), val = tensor([0, 0])]; + tensor var_609_end_mask_0 = const()[name = string("op_609_end_mask_0"), val = tensor([false, true])]; + tensor var_609_cast_fp16 = slice_by_index(begin = var_609_begin_0, end = concat_59, end_mask = var_609_end_mask_0, x = mask_to_fp16)[name = string("op_609_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_610_begin_0 = const()[name = string("op_610_begin_0"), val = tensor([0, 0])]; + tensor var_610_end_mask_0 = const()[name = string("op_610_end_mask_0"), val = tensor([true, false])]; + tensor var_610_cast_fp16 = slice_by_index(begin = var_610_begin_0, end = concat_60, end_mask = var_610_end_mask_0, x = var_609_cast_fp16)[name = string("op_610_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_610_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_522, x = qk_15_cast_fp16)[name = string("op_613_cast_fp16")]; + bool var_615_transpose_x_0 = const()[name = string("op_615_transpose_x_0"), val = bool(false)]; + bool var_615_transpose_y_0 = const()[name = string("op_615_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_606, x = var_605_cast_fp16)[name = string("transpose_224")]; + tensor var_615_cast_fp16 = matmul(transpose_x = var_615_transpose_x_0, transpose_y = var_615_transpose_y_0, x = var_613_cast_fp16, y = v_25_cast_fp16)[name = string("op_615_cast_fp16")]; + tensor var_616 = const()[name = string("op_616"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 768])]; + tensor var_617_cast_fp16 = transpose(perm = var_616, x = var_615_cast_fp16)[name = string("transpose_221")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_617_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_621_to_fp16 = const()[name = string("op_621_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119687616)))]; + tensor var_622_to_fp16 = const()[name = string("op_622_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120867328)))]; + tensor linear_19_cast_fp16 = linear(bias = var_622_to_fp16, weight = var_621_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_629_axes_0 = const()[name = string("op_629_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120868928)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120870528)))]; + tensor var_629_cast_fp16 = layer_norm(axes = var_629_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_629_cast_fp16")]; + tensor var_638_to_fp16 = const()[name = string("op_638_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120872128)))]; + tensor var_639_to_fp16 = const()[name = string("op_639_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122051840)))]; + tensor linear_20_cast_fp16 = linear(bias = var_639_to_fp16, weight = var_638_to_fp16, x = var_629_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 12, 64])]; + tensor var_659_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_659_cast_fp16")]; + tensor const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_70_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_665 = const()[name = string("op_665"), val = tensor([1, 1500, 12, -1])]; + tensor var_666_cast_fp16 = reshape(shape = var_665, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_666_cast_fp16, y = const_71_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_672 = const()[name = string("op_672"), val = tensor([1, 1500, 12, -1])]; + tensor var_673_cast_fp16 = reshape(shape = var_672, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_673_cast_fp16")]; + tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = k_29_cast_fp16)[name = string("transpose_218")]; + tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = q_23_cast_fp16)[name = string("transpose_219")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_107, y = transpose_108)[name = string("qk_17_cast_fp16")]; + tensor var_678_cast_fp16 = softmax(axis = var_522, x = qk_17_cast_fp16)[name = string("op_678_cast_fp16")]; + bool var_680_transpose_x_0 = const()[name = string("op_680_transpose_x_0"), val = bool(false)]; + bool var_680_transpose_y_0 = const()[name = string("op_680_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_674, x = var_673_cast_fp16)[name = string("transpose_220")]; + tensor var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = v_29_cast_fp16)[name = string("op_680_cast_fp16")]; + tensor var_681 = const()[name = string("op_681"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 768])]; + tensor var_682_cast_fp16 = transpose(perm = var_681, x = var_680_cast_fp16)[name = string("transpose_217")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_682_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122053440)))]; + tensor var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123233152)))]; + tensor linear_21_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123234752)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123236352)))]; + tensor var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_528_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_694_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123237952)))]; + tensor var_704_to_fp16 = const()[name = string("op_704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127956608)))]; + tensor linear_22_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127962816)))]; + tensor var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132681472)))]; + tensor linear_23_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 768])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_28)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 768])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_29)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_15_begin_0 = const()[name = string("k_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_15_end_0 = const()[name = string("k_cache_15_end_0"), val = tensor([4, 1, 1500, 768])]; + tensor k_cache_15_end_mask_0 = const()[name = string("k_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_15_squeeze_mask_0 = const()[name = string("k_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_15_cast_fp16 = slice_by_index(begin = k_cache_15_begin_0, end = k_cache_15_end_0, end_mask = k_cache_15_end_mask_0, squeeze_mask = k_cache_15_squeeze_mask_0, x = read_state_2)[name = string("k_cache_15_cast_fp16")]; + tensor v_cache_15_begin_0 = const()[name = string("v_cache_15_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_15_end_0 = const()[name = string("v_cache_15_end_0"), val = tensor([4, 1, 1500, 768])]; + tensor v_cache_15_end_mask_0 = const()[name = string("v_cache_15_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_15_squeeze_mask_0 = const()[name = string("v_cache_15_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_15_cast_fp16 = slice_by_index(begin = v_cache_15_begin_0, end = v_cache_15_end_0, end_mask = v_cache_15_end_mask_0, squeeze_mask = v_cache_15_squeeze_mask_0, x = read_state_3)[name = string("v_cache_15_cast_fp16")]; + int32 var_733 = const()[name = string("op_733"), val = int32(-1)]; + tensor var_751_axes_0 = const()[name = string("op_751_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132683072)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132684672)))]; + fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_751_cast_fp16 = layer_norm(axes = var_751_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_751_cast_fp16")]; + tensor var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132686272)))]; + tensor var_763_to_fp16 = const()[name = string("op_763_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133865984)))]; + tensor linear_24_cast_fp16 = linear(bias = var_763_to_fp16, weight = var_762_to_fp16, x = var_751_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_766_to_fp16 = const()[name = string("op_766_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133867584)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_766_to_fp16, x = var_751_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_770_to_fp16 = const()[name = string("op_770_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135047296)))]; + tensor var_771_to_fp16 = const()[name = string("op_771_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136227008)))]; + tensor linear_26_cast_fp16 = linear(bias = var_771_to_fp16, weight = var_770_to_fp16, x = var_751_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_773_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_773_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_773_shape_cast_fp16_to_uint16 = cast(dtype = var_773_shape_cast_fp16_to_uint16_dtype_0, x = var_773_shape_cast_fp16)[name = string("cast_144")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_773_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_143")]; + int32 end_step_9 = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step_9")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step_9)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_28)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = k_cache1)[name = string("coreml_update_state_30")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_29)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = v_cache1)[name = string("coreml_update_state_31")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(768)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step_9, concat_76_values2_0))[name = string("concat_76")]; + tensor var_789_begin_0 = const()[name = string("op_789_begin_0"), val = tensor([0, 0, 0])]; + tensor var_789_end_mask_0 = const()[name = string("op_789_end_mask_0"), val = tensor([true, false, true])]; + tensor var_789_cast_fp16 = slice_by_index(begin = var_789_begin_0, end = concat_76, end_mask = var_789_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_789_cast_fp16")]; + tensor var_792_begin_0 = const()[name = string("op_792_begin_0"), val = tensor([0, 0, 0])]; + tensor var_792_end_mask_0 = const()[name = string("op_792_end_mask_0"), val = tensor([true, false, true])]; + tensor var_792_cast_fp16 = slice_by_index(begin = var_792_begin_0, end = concat_76, end_mask = var_792_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_792_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 12, 64])]; + tensor var_802_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_802_cast_fp16")]; + tensor const_72_to_fp16 = const()[name = string("const_72_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_802_cast_fp16, y = const_72_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 12, 64])]; + tensor var_809_cast_fp16 = reshape(shape = concat_79x, x = var_789_cast_fp16)[name = string("op_809_cast_fp16")]; + tensor const_73_to_fp16 = const()[name = string("const_73_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_809_cast_fp16, y = const_73_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 12, 64])]; + tensor var_816_cast_fp16 = reshape(shape = concat_80x, x = var_792_cast_fp16)[name = string("op_816_cast_fp16")]; + tensor var_817 = const()[name = string("op_817"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = k_35_cast_fp16)[name = string("transpose_214")]; + tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = q_27_cast_fp16)[name = string("transpose_215")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_109, y = transpose_110)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_820_begin_0 = const()[name = string("op_820_begin_0"), val = tensor([0, 0])]; + tensor var_820_end_mask_0 = const()[name = string("op_820_end_mask_0"), val = tensor([false, true])]; + tensor var_820_cast_fp16 = slice_by_index(begin = var_820_begin_0, end = concat_81, end_mask = var_820_end_mask_0, x = mask_to_fp16)[name = string("op_820_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_821_begin_0 = const()[name = string("op_821_begin_0"), val = tensor([0, 0])]; + tensor var_821_end_mask_0 = const()[name = string("op_821_end_mask_0"), val = tensor([true, false])]; + tensor var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = concat_82, end_mask = var_821_end_mask_0, x = var_820_cast_fp16)[name = string("op_821_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_821_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_824_cast_fp16 = softmax(axis = var_733, x = qk_21_cast_fp16)[name = string("op_824_cast_fp16")]; + bool var_826_transpose_x_0 = const()[name = string("op_826_transpose_x_0"), val = bool(false)]; + bool var_826_transpose_y_0 = const()[name = string("op_826_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_817, x = var_816_cast_fp16)[name = string("transpose_216")]; + tensor var_826_cast_fp16 = matmul(transpose_x = var_826_transpose_x_0, transpose_y = var_826_transpose_y_0, x = var_824_cast_fp16, y = v_35_cast_fp16)[name = string("op_826_cast_fp16")]; + tensor var_827 = const()[name = string("op_827"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 768])]; + tensor var_828_cast_fp16 = transpose(perm = var_827, x = var_826_cast_fp16)[name = string("transpose_213")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_828_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136228608)))]; + tensor var_833_to_fp16 = const()[name = string("op_833_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137408320)))]; + tensor linear_27_cast_fp16 = linear(bias = var_833_to_fp16, weight = var_832_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_840_axes_0 = const()[name = string("op_840_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137409920)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137411520)))]; + tensor var_840_cast_fp16 = layer_norm(axes = var_840_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_840_cast_fp16")]; + tensor var_849_to_fp16 = const()[name = string("op_849_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137413120)))]; + tensor var_850_to_fp16 = const()[name = string("op_850_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138592832)))]; + tensor linear_28_cast_fp16 = linear(bias = var_850_to_fp16, weight = var_849_to_fp16, x = var_840_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_15_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 12, 64])]; + tensor var_870_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_870_cast_fp16")]; + tensor const_74_to_fp16 = const()[name = string("const_74_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_870_cast_fp16, y = const_74_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_876 = const()[name = string("op_876"), val = tensor([1, 1500, 12, -1])]; + tensor var_877_cast_fp16 = reshape(shape = var_876, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_877_cast_fp16")]; + tensor const_75_to_fp16 = const()[name = string("const_75_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_877_cast_fp16, y = const_75_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_883 = const()[name = string("op_883"), val = tensor([1, 1500, 12, -1])]; + tensor var_884_cast_fp16 = reshape(shape = var_883, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_884_cast_fp16")]; + tensor var_885 = const()[name = string("op_885"), val = tensor([0, 2, 1, 3])]; + bool qk_23_transpose_x_0 = const()[name = string("qk_23_transpose_x_0"), val = bool(false)]; + bool qk_23_transpose_y_0 = const()[name = string("qk_23_transpose_y_0"), val = bool(false)]; + tensor transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = k_39_cast_fp16)[name = string("transpose_210")]; + tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = q_31_cast_fp16)[name = string("transpose_211")]; + tensor qk_23_cast_fp16 = matmul(transpose_x = qk_23_transpose_x_0, transpose_y = qk_23_transpose_y_0, x = transpose_111, y = transpose_112)[name = string("qk_23_cast_fp16")]; + tensor var_889_cast_fp16 = softmax(axis = var_733, x = qk_23_cast_fp16)[name = string("op_889_cast_fp16")]; + bool var_891_transpose_x_0 = const()[name = string("op_891_transpose_x_0"), val = bool(false)]; + bool var_891_transpose_y_0 = const()[name = string("op_891_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_885, x = var_884_cast_fp16)[name = string("transpose_212")]; + tensor var_891_cast_fp16 = matmul(transpose_x = var_891_transpose_x_0, transpose_y = var_891_transpose_y_0, x = var_889_cast_fp16, y = v_39_cast_fp16)[name = string("op_891_cast_fp16")]; + tensor var_892 = const()[name = string("op_892"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 768])]; + tensor var_893_cast_fp16 = transpose(perm = var_892, x = var_891_cast_fp16)[name = string("transpose_209")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_893_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138594432)))]; + tensor var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139774144)))]; + tensor linear_29_cast_fp16 = linear(bias = var_898_to_fp16, weight = var_897_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_905_axes_0 = const()[name = string("op_905_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139775744)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139777344)))]; + tensor var_905_cast_fp16 = layer_norm(axes = var_905_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_739_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_905_cast_fp16")]; + tensor var_914_to_fp16 = const()[name = string("op_914_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139778944)))]; + tensor var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144497600)))]; + tensor linear_30_cast_fp16 = linear(bias = var_915_to_fp16, weight = var_914_to_fp16, x = var_905_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_920_to_fp16 = const()[name = string("op_920_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144503808)))]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149222464)))]; + tensor linear_31_cast_fp16 = linear(bias = var_921_to_fp16, weight = var_920_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor k_cache_17_begin_0 = const()[name = string("k_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_17_end_0 = const()[name = string("k_cache_17_end_0"), val = tensor([5, 1, 448, 768])]; + tensor k_cache_17_end_mask_0 = const()[name = string("k_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_17_squeeze_mask_0 = const()[name = string("k_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_17_cast_fp16 = slice_by_index(begin = k_cache_17_begin_0, end = k_cache_17_end_0, end_mask = k_cache_17_end_mask_0, squeeze_mask = k_cache_17_squeeze_mask_0, x = coreml_update_state_30)[name = string("k_cache_17_cast_fp16")]; + tensor v_cache_17_begin_0 = const()[name = string("v_cache_17_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_17_end_0 = const()[name = string("v_cache_17_end_0"), val = tensor([5, 1, 448, 768])]; + tensor v_cache_17_end_mask_0 = const()[name = string("v_cache_17_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_17_squeeze_mask_0 = const()[name = string("v_cache_17_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_17_cast_fp16 = slice_by_index(begin = v_cache_17_begin_0, end = v_cache_17_end_0, end_mask = v_cache_17_end_mask_0, squeeze_mask = v_cache_17_squeeze_mask_0, x = coreml_update_state_31)[name = string("v_cache_17_cast_fp16")]; + tensor k_cache_19_begin_0 = const()[name = string("k_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor k_cache_19_end_0 = const()[name = string("k_cache_19_end_0"), val = tensor([5, 1, 1500, 768])]; + tensor k_cache_19_end_mask_0 = const()[name = string("k_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_19_squeeze_mask_0 = const()[name = string("k_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_19_cast_fp16 = slice_by_index(begin = k_cache_19_begin_0, end = k_cache_19_end_0, end_mask = k_cache_19_end_mask_0, squeeze_mask = k_cache_19_squeeze_mask_0, x = read_state_2)[name = string("k_cache_19_cast_fp16")]; + tensor v_cache_19_begin_0 = const()[name = string("v_cache_19_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor v_cache_19_end_0 = const()[name = string("v_cache_19_end_0"), val = tensor([5, 1, 1500, 768])]; + tensor v_cache_19_end_mask_0 = const()[name = string("v_cache_19_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_19_squeeze_mask_0 = const()[name = string("v_cache_19_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_19_cast_fp16 = slice_by_index(begin = v_cache_19_begin_0, end = v_cache_19_end_0, end_mask = v_cache_19_end_mask_0, squeeze_mask = v_cache_19_squeeze_mask_0, x = read_state_3)[name = string("v_cache_19_cast_fp16")]; + int32 var_944 = const()[name = string("op_944"), val = int32(-1)]; + tensor var_962_axes_0 = const()[name = string("op_962_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149224064)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149225664)))]; + fp16 var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_962_cast_fp16 = layer_norm(axes = var_962_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_962_cast_fp16")]; + tensor var_973_to_fp16 = const()[name = string("op_973_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149227264)))]; + tensor var_974_to_fp16 = const()[name = string("op_974_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150406976)))]; + tensor linear_32_cast_fp16 = linear(bias = var_974_to_fp16, weight = var_973_to_fp16, x = var_962_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_977_to_fp16 = const()[name = string("op_977_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150408576)))]; + tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_977_to_fp16, x = var_962_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor var_981_to_fp16 = const()[name = string("op_981_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151588288)))]; + tensor var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152768000)))]; + tensor linear_34_cast_fp16 = linear(bias = var_982_to_fp16, weight = var_981_to_fp16, x = var_962_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor var_984_shape_cast_fp16 = shape(x = linear_32_cast_fp16)[name = string("op_984_shape_cast_fp16")]; + int32 gather_50_axis_0 = const()[name = string("gather_50_axis_0"), val = int32(0)]; + int32 gather_50_batch_dims_0 = const()[name = string("gather_50_batch_dims_0"), val = int32(0)]; + bool gather_50_validate_indices_0 = const()[name = string("gather_50_validate_indices_0"), val = bool(false)]; + string var_984_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_984_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_50_to_uint16 = const()[name = string("select_50_to_uint16"), val = uint16(1)]; + tensor var_984_shape_cast_fp16_to_uint16 = cast(dtype = var_984_shape_cast_fp16_to_uint16_dtype_0, x = var_984_shape_cast_fp16)[name = string("cast_142")]; + uint16 gather_50_cast_uint16 = gather(axis = gather_50_axis_0, batch_dims = gather_50_batch_dims_0, indices = select_50_to_uint16, validate_indices = gather_50_validate_indices_0, x = var_984_shape_cast_fp16_to_uint16)[name = string("gather_50_cast_uint16")]; + string gather_50_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_50_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_50_cast_uint16_to_int32 = cast(dtype = gather_50_cast_uint16_to_int32_dtype_0, x = gather_50_cast_uint16)[name = string("cast_141")]; + int32 end_step_11 = add(x = offset, y = gather_50_cast_uint16_to_int32)[name = string("end_step_11")]; + tensor expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor([0])]; + tensor expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor([0])]; + tensor expand_dims_67_axes_0 = const()[name = string("expand_dims_67_axes_0"), val = tensor([0])]; + tensor expand_dims_67 = expand_dims(axes = expand_dims_67_axes_0, x = end_step_11)[name = string("expand_dims_67")]; + tensor concat_92_values0_0 = const()[name = string("concat_92_values0_0"), val = tensor([4])]; + int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; + bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; + tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (concat_92_values0_0, expand_dims_64, expand_dims_1, expand_dims_66))[name = string("concat_92")]; + tensor concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = tensor([0])]; + tensor concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor([0])]; + tensor concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor([0])]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, expand_dims_67, concat_93_values3_0))[name = string("concat_93")]; + tensor k_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = k_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = k_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_5_stride_0, update = linear_33_cast_fp16, x = coreml_update_state_30)[name = string("k_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_5_cast_fp16, input = k_cache1)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = k_cache1)[name = string("coreml_update_state_32")]; + tensor v_cache1_internal_tensor_assign_5_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_5_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_92, begin_mask = v_cache1_internal_tensor_assign_5_begin_mask_0, end = concat_93, end_mask = v_cache1_internal_tensor_assign_5_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_5_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_5_stride_0, update = linear_34_cast_fp16, x = coreml_update_state_31)[name = string("v_cache1_internal_tensor_assign_5_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_5_cast_fp16, input = v_cache1)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = v_cache1)[name = string("coreml_update_state_33")]; + int32 concat_98_values0_0 = const()[name = string("concat_98_values0_0"), val = int32(1)]; + int32 concat_98_values2_0 = const()[name = string("concat_98_values2_0"), val = int32(768)]; + int32 concat_98_axis_0 = const()[name = string("concat_98_axis_0"), val = int32(0)]; + bool concat_98_interleave_0 = const()[name = string("concat_98_interleave_0"), val = bool(false)]; + tensor concat_98 = concat(axis = concat_98_axis_0, interleave = concat_98_interleave_0, values = (concat_98_values0_0, end_step_11, concat_98_values2_0))[name = string("concat_98")]; + tensor var_1000_begin_0 = const()[name = string("op_1000_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1000_end_mask_0 = const()[name = string("op_1000_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = concat_98, end_mask = var_1000_end_mask_0, x = k_cache_17_cast_fp16)[name = string("op_1000_cast_fp16")]; + tensor var_1003_begin_0 = const()[name = string("op_1003_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1003_end_mask_0 = const()[name = string("op_1003_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = concat_98, end_mask = var_1003_end_mask_0, x = v_cache_17_cast_fp16)[name = string("op_1003_cast_fp16")]; + tensor concat_100x = const()[name = string("concat_100x"), val = tensor([1, -1, 12, 64])]; + tensor var_1013_cast_fp16 = reshape(shape = concat_100x, x = linear_32_cast_fp16)[name = string("op_1013_cast_fp16")]; + tensor const_76_to_fp16 = const()[name = string("const_76_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_1013_cast_fp16, y = const_76_to_fp16)[name = string("q_35_cast_fp16")]; + tensor concat_101x = const()[name = string("concat_101x"), val = tensor([1, -1, 12, 64])]; + tensor var_1020_cast_fp16 = reshape(shape = concat_101x, x = var_1000_cast_fp16)[name = string("op_1020_cast_fp16")]; + tensor const_77_to_fp16 = const()[name = string("const_77_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_45_cast_fp16 = mul(x = var_1020_cast_fp16, y = const_77_to_fp16)[name = string("k_45_cast_fp16")]; + tensor concat_102x = const()[name = string("concat_102x"), val = tensor([1, -1, 12, 64])]; + tensor var_1027_cast_fp16 = reshape(shape = concat_102x, x = var_1003_cast_fp16)[name = string("op_1027_cast_fp16")]; + tensor var_1028 = const()[name = string("op_1028"), val = tensor([0, 2, 1, 3])]; + bool qk_25_transpose_x_0 = const()[name = string("qk_25_transpose_x_0"), val = bool(false)]; + bool qk_25_transpose_y_0 = const()[name = string("qk_25_transpose_y_0"), val = bool(false)]; + tensor transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = k_45_cast_fp16)[name = string("transpose_206")]; + tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = q_35_cast_fp16)[name = string("transpose_207")]; + tensor qk_25_cast_fp16 = matmul(transpose_x = qk_25_transpose_x_0, transpose_y = qk_25_transpose_y_0, x = transpose_113, y = transpose_114)[name = string("qk_25_cast_fp16")]; + int32 concat_103_values1_0 = const()[name = string("concat_103_values1_0"), val = int32(448)]; + int32 concat_103_axis_0 = const()[name = string("concat_103_axis_0"), val = int32(0)]; + bool concat_103_interleave_0 = const()[name = string("concat_103_interleave_0"), val = bool(false)]; + tensor concat_103 = concat(axis = concat_103_axis_0, interleave = concat_103_interleave_0, values = (gather_50_cast_uint16_to_int32, concat_103_values1_0))[name = string("concat_103")]; + tensor var_1031_begin_0 = const()[name = string("op_1031_begin_0"), val = tensor([0, 0])]; + tensor var_1031_end_mask_0 = const()[name = string("op_1031_end_mask_0"), val = tensor([false, true])]; + tensor var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = concat_103, end_mask = var_1031_end_mask_0, x = mask_to_fp16)[name = string("op_1031_cast_fp16")]; + int32 concat_104_values0_0 = const()[name = string("concat_104_values0_0"), val = int32(0)]; + int32 concat_104_axis_0 = const()[name = string("concat_104_axis_0"), val = int32(0)]; + bool concat_104_interleave_0 = const()[name = string("concat_104_interleave_0"), val = bool(false)]; + tensor concat_104 = concat(axis = concat_104_axis_0, interleave = concat_104_interleave_0, values = (concat_104_values0_0, gather_50_cast_uint16_to_int32))[name = string("concat_104")]; + tensor var_1032_begin_0 = const()[name = string("op_1032_begin_0"), val = tensor([0, 0])]; + tensor var_1032_end_mask_0 = const()[name = string("op_1032_end_mask_0"), val = tensor([true, false])]; + tensor var_1032_cast_fp16 = slice_by_index(begin = var_1032_begin_0, end = concat_104, end_mask = var_1032_end_mask_0, x = var_1031_cast_fp16)[name = string("op_1032_cast_fp16")]; + tensor qk_27_cast_fp16 = add(x = qk_25_cast_fp16, y = var_1032_cast_fp16)[name = string("qk_27_cast_fp16")]; + tensor var_1035_cast_fp16 = softmax(axis = var_944, x = qk_27_cast_fp16)[name = string("op_1035_cast_fp16")]; + bool var_1037_transpose_x_0 = const()[name = string("op_1037_transpose_x_0"), val = bool(false)]; + bool var_1037_transpose_y_0 = const()[name = string("op_1037_transpose_y_0"), val = bool(false)]; + tensor v_45_cast_fp16 = transpose(perm = var_1028, x = var_1027_cast_fp16)[name = string("transpose_208")]; + tensor var_1037_cast_fp16 = matmul(transpose_x = var_1037_transpose_x_0, transpose_y = var_1037_transpose_y_0, x = var_1035_cast_fp16, y = v_45_cast_fp16)[name = string("op_1037_cast_fp16")]; + tensor var_1038 = const()[name = string("op_1038"), val = tensor([0, 2, 1, 3])]; + tensor concat_105x = const()[name = string("concat_105x"), val = tensor([1, -1, 768])]; + tensor var_1039_cast_fp16 = transpose(perm = var_1038, x = var_1037_cast_fp16)[name = string("transpose_205")]; + tensor x_79_cast_fp16 = reshape(shape = concat_105x, x = var_1039_cast_fp16)[name = string("x_79_cast_fp16")]; + tensor var_1043_to_fp16 = const()[name = string("op_1043_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152769600)))]; + tensor var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153949312)))]; + tensor linear_35_cast_fp16 = linear(bias = var_1044_to_fp16, weight = var_1043_to_fp16, x = x_79_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_81_cast_fp16 = add(x = x_75_cast_fp16, y = linear_35_cast_fp16)[name = string("x_81_cast_fp16")]; + tensor var_1051_axes_0 = const()[name = string("op_1051_axes_0"), val = tensor([-1])]; + tensor blocks_4_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153950912)))]; + tensor blocks_4_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153952512)))]; + tensor var_1051_cast_fp16 = layer_norm(axes = var_1051_axes_0, beta = blocks_4_cross_attn_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_cross_attn_ln_weight_to_fp16, x = x_81_cast_fp16)[name = string("op_1051_cast_fp16")]; + tensor var_1060_to_fp16 = const()[name = string("op_1060_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153954112)))]; + tensor var_1061_to_fp16 = const()[name = string("op_1061_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155133824)))]; + tensor linear_36_cast_fp16 = linear(bias = var_1061_to_fp16, weight = var_1060_to_fp16, x = var_1051_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor concat_106 = const()[name = string("concat_106"), val = tensor([0, 0, 0])]; + tensor concat_107 = const()[name = string("concat_107"), val = tensor([0, 1500, 0])]; + tensor k_47_internal_tensor_assign_1_stride_0 = const()[name = string("k_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_106, begin_mask = k_47_internal_tensor_assign_1_begin_mask_0, end = concat_107, end_mask = k_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_47_internal_tensor_assign_1_squeeze_mask_0, stride = k_47_internal_tensor_assign_1_stride_0, update = k_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("k_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_108 = const()[name = string("concat_108"), val = tensor([0, 0, 0])]; + tensor concat_109 = const()[name = string("concat_109"), val = tensor([0, 1500, 0])]; + tensor v_47_internal_tensor_assign_1_stride_0 = const()[name = string("v_47_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_47_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_47_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_47_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_47_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_108, begin_mask = v_47_internal_tensor_assign_1_begin_mask_0, end = concat_109, end_mask = v_47_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_47_internal_tensor_assign_1_squeeze_mask_0, stride = v_47_internal_tensor_assign_1_stride_0, update = v_cache_19_cast_fp16, x = k_7_to_fp16)[name = string("v_47_internal_tensor_assign_1_cast_fp16")]; + tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, -1, 12, 64])]; + tensor var_1081_cast_fp16 = reshape(shape = concat_110x, x = linear_36_cast_fp16)[name = string("op_1081_cast_fp16")]; + tensor const_78_to_fp16 = const()[name = string("const_78_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1081_cast_fp16, y = const_78_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1087 = const()[name = string("op_1087"), val = tensor([1, 1500, 12, -1])]; + tensor var_1088_cast_fp16 = reshape(shape = var_1087, x = k_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1088_cast_fp16")]; + tensor const_79_to_fp16 = const()[name = string("const_79_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_49_cast_fp16 = mul(x = var_1088_cast_fp16, y = const_79_to_fp16)[name = string("k_49_cast_fp16")]; + tensor var_1094 = const()[name = string("op_1094"), val = tensor([1, 1500, 12, -1])]; + tensor var_1095_cast_fp16 = reshape(shape = var_1094, x = v_47_internal_tensor_assign_1_cast_fp16)[name = string("op_1095_cast_fp16")]; + tensor var_1096 = const()[name = string("op_1096"), val = tensor([0, 2, 1, 3])]; + bool qk_29_transpose_x_0 = const()[name = string("qk_29_transpose_x_0"), val = bool(false)]; + bool qk_29_transpose_y_0 = const()[name = string("qk_29_transpose_y_0"), val = bool(false)]; + tensor transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = k_49_cast_fp16)[name = string("transpose_202")]; + tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = q_39_cast_fp16)[name = string("transpose_203")]; + tensor qk_29_cast_fp16 = matmul(transpose_x = qk_29_transpose_x_0, transpose_y = qk_29_transpose_y_0, x = transpose_115, y = transpose_116)[name = string("qk_29_cast_fp16")]; + tensor var_1100_cast_fp16 = softmax(axis = var_944, x = qk_29_cast_fp16)[name = string("op_1100_cast_fp16")]; + bool var_1102_transpose_x_0 = const()[name = string("op_1102_transpose_x_0"), val = bool(false)]; + bool var_1102_transpose_y_0 = const()[name = string("op_1102_transpose_y_0"), val = bool(false)]; + tensor v_49_cast_fp16 = transpose(perm = var_1096, x = var_1095_cast_fp16)[name = string("transpose_204")]; + tensor var_1102_cast_fp16 = matmul(transpose_x = var_1102_transpose_x_0, transpose_y = var_1102_transpose_y_0, x = var_1100_cast_fp16, y = v_49_cast_fp16)[name = string("op_1102_cast_fp16")]; + tensor var_1103 = const()[name = string("op_1103"), val = tensor([0, 2, 1, 3])]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, -1, 768])]; + tensor var_1104_cast_fp16 = transpose(perm = var_1103, x = var_1102_cast_fp16)[name = string("transpose_201")]; + tensor x_85_cast_fp16 = reshape(shape = concat_111x, x = var_1104_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155135424)))]; + tensor var_1109_to_fp16 = const()[name = string("op_1109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156315136)))]; + tensor linear_37_cast_fp16 = linear(bias = var_1109_to_fp16, weight = var_1108_to_fp16, x = x_85_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor x_87_cast_fp16 = add(x = x_81_cast_fp16, y = linear_37_cast_fp16)[name = string("x_87_cast_fp16")]; + tensor var_1116_axes_0 = const()[name = string("op_1116_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156316736)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156318336)))]; + tensor var_1116_cast_fp16 = layer_norm(axes = var_1116_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_950_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_87_cast_fp16)[name = string("op_1116_cast_fp16")]; + tensor var_1125_to_fp16 = const()[name = string("op_1125_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156319936)))]; + tensor var_1126_to_fp16 = const()[name = string("op_1126_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161038592)))]; + tensor linear_38_cast_fp16 = linear(bias = var_1126_to_fp16, weight = var_1125_to_fp16, x = var_1116_cast_fp16)[name = string("linear_38_cast_fp16")]; + string x_91_mode_0 = const()[name = string("x_91_mode_0"), val = string("EXACT")]; + tensor x_91_cast_fp16 = gelu(mode = x_91_mode_0, x = linear_38_cast_fp16)[name = string("x_91_cast_fp16")]; + tensor var_1131_to_fp16 = const()[name = string("op_1131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161044800)))]; + tensor var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165763456)))]; + tensor linear_39_cast_fp16 = linear(bias = var_1132_to_fp16, weight = var_1131_to_fp16, x = x_91_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_93_cast_fp16 = add(x = x_87_cast_fp16, y = linear_39_cast_fp16)[name = string("x_93_cast_fp16")]; + tensor k_cache_21_begin_0 = const()[name = string("k_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_21_end_0 = const()[name = string("k_cache_21_end_0"), val = tensor([6, 1, 448, 768])]; + tensor k_cache_21_end_mask_0 = const()[name = string("k_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_21_squeeze_mask_0 = const()[name = string("k_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_21_cast_fp16 = slice_by_index(begin = k_cache_21_begin_0, end = k_cache_21_end_0, end_mask = k_cache_21_end_mask_0, squeeze_mask = k_cache_21_squeeze_mask_0, x = coreml_update_state_32)[name = string("k_cache_21_cast_fp16")]; + tensor v_cache_21_begin_0 = const()[name = string("v_cache_21_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_21_end_0 = const()[name = string("v_cache_21_end_0"), val = tensor([6, 1, 448, 768])]; + tensor v_cache_21_end_mask_0 = const()[name = string("v_cache_21_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_21_squeeze_mask_0 = const()[name = string("v_cache_21_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_21_cast_fp16 = slice_by_index(begin = v_cache_21_begin_0, end = v_cache_21_end_0, end_mask = v_cache_21_end_mask_0, squeeze_mask = v_cache_21_squeeze_mask_0, x = coreml_update_state_33)[name = string("v_cache_21_cast_fp16")]; + tensor k_cache_23_begin_0 = const()[name = string("k_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor k_cache_23_end_0 = const()[name = string("k_cache_23_end_0"), val = tensor([6, 1, 1500, 768])]; + tensor k_cache_23_end_mask_0 = const()[name = string("k_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_23_squeeze_mask_0 = const()[name = string("k_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_23_cast_fp16 = slice_by_index(begin = k_cache_23_begin_0, end = k_cache_23_end_0, end_mask = k_cache_23_end_mask_0, squeeze_mask = k_cache_23_squeeze_mask_0, x = read_state_2)[name = string("k_cache_23_cast_fp16")]; + tensor v_cache_23_begin_0 = const()[name = string("v_cache_23_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor v_cache_23_end_0 = const()[name = string("v_cache_23_end_0"), val = tensor([6, 1, 1500, 768])]; + tensor v_cache_23_end_mask_0 = const()[name = string("v_cache_23_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_23_squeeze_mask_0 = const()[name = string("v_cache_23_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_23_cast_fp16 = slice_by_index(begin = v_cache_23_begin_0, end = v_cache_23_end_0, end_mask = v_cache_23_end_mask_0, squeeze_mask = v_cache_23_squeeze_mask_0, x = read_state_3)[name = string("v_cache_23_cast_fp16")]; + int32 var_1155 = const()[name = string("op_1155"), val = int32(-1)]; + tensor var_1173_axes_0 = const()[name = string("op_1173_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165765056)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165766656)))]; + fp16 var_1161_to_fp16 = const()[name = string("op_1161_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1173_cast_fp16 = layer_norm(axes = var_1173_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_93_cast_fp16)[name = string("op_1173_cast_fp16")]; + tensor var_1184_to_fp16 = const()[name = string("op_1184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165768256)))]; + tensor var_1185_to_fp16 = const()[name = string("op_1185_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166947968)))]; + tensor linear_40_cast_fp16 = linear(bias = var_1185_to_fp16, weight = var_1184_to_fp16, x = var_1173_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor var_1188_to_fp16 = const()[name = string("op_1188_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166949568)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1188_to_fp16, x = var_1173_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor var_1192_to_fp16 = const()[name = string("op_1192_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168129280)))]; + tensor var_1193_to_fp16 = const()[name = string("op_1193_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169308992)))]; + tensor linear_42_cast_fp16 = linear(bias = var_1193_to_fp16, weight = var_1192_to_fp16, x = var_1173_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_1195_shape_cast_fp16 = shape(x = linear_40_cast_fp16)[name = string("op_1195_shape_cast_fp16")]; + int32 gather_62_axis_0 = const()[name = string("gather_62_axis_0"), val = int32(0)]; + int32 gather_62_batch_dims_0 = const()[name = string("gather_62_batch_dims_0"), val = int32(0)]; + bool gather_62_validate_indices_0 = const()[name = string("gather_62_validate_indices_0"), val = bool(false)]; + string var_1195_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1195_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_62_to_uint16 = const()[name = string("select_62_to_uint16"), val = uint16(1)]; + tensor var_1195_shape_cast_fp16_to_uint16 = cast(dtype = var_1195_shape_cast_fp16_to_uint16_dtype_0, x = var_1195_shape_cast_fp16)[name = string("cast_140")]; + uint16 gather_62_cast_uint16 = gather(axis = gather_62_axis_0, batch_dims = gather_62_batch_dims_0, indices = select_62_to_uint16, validate_indices = gather_62_validate_indices_0, x = var_1195_shape_cast_fp16_to_uint16)[name = string("gather_62_cast_uint16")]; + string gather_62_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_62_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_62_cast_uint16_to_int32 = cast(dtype = gather_62_cast_uint16_to_int32_dtype_0, x = gather_62_cast_uint16)[name = string("cast_139")]; + int32 end_step_13 = add(x = offset, y = gather_62_cast_uint16_to_int32)[name = string("end_step_13")]; + tensor expand_dims_80 = const()[name = string("expand_dims_80"), val = tensor([0])]; + tensor expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor([0])]; + tensor expand_dims_83_axes_0 = const()[name = string("expand_dims_83_axes_0"), val = tensor([0])]; + tensor expand_dims_83 = expand_dims(axes = expand_dims_83_axes_0, x = end_step_13)[name = string("expand_dims_83")]; + tensor concat_114_values0_0 = const()[name = string("concat_114_values0_0"), val = tensor([5])]; + int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; + bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; + tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (concat_114_values0_0, expand_dims_80, expand_dims_1, expand_dims_82))[name = string("concat_114")]; + tensor concat_115_values0_0 = const()[name = string("concat_115_values0_0"), val = tensor([0])]; + tensor concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor([0])]; + tensor concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor([0])]; + int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)]; + bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)]; + tensor concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (concat_115_values0_0, concat_115_values1_0, expand_dims_83, concat_115_values3_0))[name = string("concat_115")]; + tensor k_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = k_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = k_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_6_stride_0, update = linear_41_cast_fp16, x = coreml_update_state_32)[name = string("k_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_6_cast_fp16, input = k_cache1)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = k_cache1)[name = string("coreml_update_state_34")]; + tensor v_cache1_internal_tensor_assign_6_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_6_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_114, begin_mask = v_cache1_internal_tensor_assign_6_begin_mask_0, end = concat_115, end_mask = v_cache1_internal_tensor_assign_6_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_6_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_6_stride_0, update = linear_42_cast_fp16, x = coreml_update_state_33)[name = string("v_cache1_internal_tensor_assign_6_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_6_cast_fp16, input = v_cache1)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = v_cache1)[name = string("coreml_update_state_35")]; + int32 concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = int32(1)]; + int32 concat_120_values2_0 = const()[name = string("concat_120_values2_0"), val = int32(768)]; + int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; + bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; + tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, end_step_13, concat_120_values2_0))[name = string("concat_120")]; + tensor var_1211_begin_0 = const()[name = string("op_1211_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1211_end_mask_0 = const()[name = string("op_1211_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1211_cast_fp16 = slice_by_index(begin = var_1211_begin_0, end = concat_120, end_mask = var_1211_end_mask_0, x = k_cache_21_cast_fp16)[name = string("op_1211_cast_fp16")]; + tensor var_1214_begin_0 = const()[name = string("op_1214_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1214_end_mask_0 = const()[name = string("op_1214_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = concat_120, end_mask = var_1214_end_mask_0, x = v_cache_21_cast_fp16)[name = string("op_1214_cast_fp16")]; + tensor concat_122x = const()[name = string("concat_122x"), val = tensor([1, -1, 12, 64])]; + tensor var_1224_cast_fp16 = reshape(shape = concat_122x, x = linear_40_cast_fp16)[name = string("op_1224_cast_fp16")]; + tensor const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1224_cast_fp16, y = const_80_to_fp16)[name = string("q_43_cast_fp16")]; + tensor concat_123x = const()[name = string("concat_123x"), val = tensor([1, -1, 12, 64])]; + tensor var_1231_cast_fp16 = reshape(shape = concat_123x, x = var_1211_cast_fp16)[name = string("op_1231_cast_fp16")]; + tensor const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_55_cast_fp16 = mul(x = var_1231_cast_fp16, y = const_81_to_fp16)[name = string("k_55_cast_fp16")]; + tensor concat_124x = const()[name = string("concat_124x"), val = tensor([1, -1, 12, 64])]; + tensor var_1238_cast_fp16 = reshape(shape = concat_124x, x = var_1214_cast_fp16)[name = string("op_1238_cast_fp16")]; + tensor var_1239 = const()[name = string("op_1239"), val = tensor([0, 2, 1, 3])]; + bool qk_31_transpose_x_0 = const()[name = string("qk_31_transpose_x_0"), val = bool(false)]; + bool qk_31_transpose_y_0 = const()[name = string("qk_31_transpose_y_0"), val = bool(false)]; + tensor transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = k_55_cast_fp16)[name = string("transpose_198")]; + tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = q_43_cast_fp16)[name = string("transpose_199")]; + tensor qk_31_cast_fp16 = matmul(transpose_x = qk_31_transpose_x_0, transpose_y = qk_31_transpose_y_0, x = transpose_117, y = transpose_118)[name = string("qk_31_cast_fp16")]; + int32 concat_125_values1_0 = const()[name = string("concat_125_values1_0"), val = int32(448)]; + int32 concat_125_axis_0 = const()[name = string("concat_125_axis_0"), val = int32(0)]; + bool concat_125_interleave_0 = const()[name = string("concat_125_interleave_0"), val = bool(false)]; + tensor concat_125 = concat(axis = concat_125_axis_0, interleave = concat_125_interleave_0, values = (gather_62_cast_uint16_to_int32, concat_125_values1_0))[name = string("concat_125")]; + tensor var_1242_begin_0 = const()[name = string("op_1242_begin_0"), val = tensor([0, 0])]; + tensor var_1242_end_mask_0 = const()[name = string("op_1242_end_mask_0"), val = tensor([false, true])]; + tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = concat_125, end_mask = var_1242_end_mask_0, x = mask_to_fp16)[name = string("op_1242_cast_fp16")]; + int32 concat_126_values0_0 = const()[name = string("concat_126_values0_0"), val = int32(0)]; + int32 concat_126_axis_0 = const()[name = string("concat_126_axis_0"), val = int32(0)]; + bool concat_126_interleave_0 = const()[name = string("concat_126_interleave_0"), val = bool(false)]; + tensor concat_126 = concat(axis = concat_126_axis_0, interleave = concat_126_interleave_0, values = (concat_126_values0_0, gather_62_cast_uint16_to_int32))[name = string("concat_126")]; + tensor var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor([0, 0])]; + tensor var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor([true, false])]; + tensor var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = concat_126, end_mask = var_1243_end_mask_0, x = var_1242_cast_fp16)[name = string("op_1243_cast_fp16")]; + tensor qk_33_cast_fp16 = add(x = qk_31_cast_fp16, y = var_1243_cast_fp16)[name = string("qk_33_cast_fp16")]; + tensor var_1246_cast_fp16 = softmax(axis = var_1155, x = qk_33_cast_fp16)[name = string("op_1246_cast_fp16")]; + bool var_1248_transpose_x_0 = const()[name = string("op_1248_transpose_x_0"), val = bool(false)]; + bool var_1248_transpose_y_0 = const()[name = string("op_1248_transpose_y_0"), val = bool(false)]; + tensor v_55_cast_fp16 = transpose(perm = var_1239, x = var_1238_cast_fp16)[name = string("transpose_200")]; + tensor var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = var_1246_cast_fp16, y = v_55_cast_fp16)[name = string("op_1248_cast_fp16")]; + tensor var_1249 = const()[name = string("op_1249"), val = tensor([0, 2, 1, 3])]; + tensor concat_127x = const()[name = string("concat_127x"), val = tensor([1, -1, 768])]; + tensor var_1250_cast_fp16 = transpose(perm = var_1249, x = var_1248_cast_fp16)[name = string("transpose_197")]; + tensor x_97_cast_fp16 = reshape(shape = concat_127x, x = var_1250_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_1254_to_fp16 = const()[name = string("op_1254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169310592)))]; + tensor var_1255_to_fp16 = const()[name = string("op_1255_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170490304)))]; + tensor linear_43_cast_fp16 = linear(bias = var_1255_to_fp16, weight = var_1254_to_fp16, x = x_97_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor x_99_cast_fp16 = add(x = x_93_cast_fp16, y = linear_43_cast_fp16)[name = string("x_99_cast_fp16")]; + tensor var_1262_axes_0 = const()[name = string("op_1262_axes_0"), val = tensor([-1])]; + tensor blocks_5_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170491904)))]; + tensor blocks_5_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170493504)))]; + tensor var_1262_cast_fp16 = layer_norm(axes = var_1262_axes_0, beta = blocks_5_cross_attn_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_cross_attn_ln_weight_to_fp16, x = x_99_cast_fp16)[name = string("op_1262_cast_fp16")]; + tensor var_1271_to_fp16 = const()[name = string("op_1271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170495104)))]; + tensor var_1272_to_fp16 = const()[name = string("op_1272_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171674816)))]; + tensor linear_44_cast_fp16 = linear(bias = var_1272_to_fp16, weight = var_1271_to_fp16, x = var_1262_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_128 = const()[name = string("concat_128"), val = tensor([0, 0, 0])]; + tensor concat_129 = const()[name = string("concat_129"), val = tensor([0, 1500, 0])]; + tensor k_57_internal_tensor_assign_1_stride_0 = const()[name = string("k_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_128, begin_mask = k_57_internal_tensor_assign_1_begin_mask_0, end = concat_129, end_mask = k_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_57_internal_tensor_assign_1_squeeze_mask_0, stride = k_57_internal_tensor_assign_1_stride_0, update = k_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("k_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_130 = const()[name = string("concat_130"), val = tensor([0, 0, 0])]; + tensor concat_131 = const()[name = string("concat_131"), val = tensor([0, 1500, 0])]; + tensor v_57_internal_tensor_assign_1_stride_0 = const()[name = string("v_57_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_57_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_57_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_57_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_57_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_130, begin_mask = v_57_internal_tensor_assign_1_begin_mask_0, end = concat_131, end_mask = v_57_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_57_internal_tensor_assign_1_squeeze_mask_0, stride = v_57_internal_tensor_assign_1_stride_0, update = v_cache_23_cast_fp16, x = k_7_to_fp16)[name = string("v_57_internal_tensor_assign_1_cast_fp16")]; + tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 12, 64])]; + tensor var_1292_cast_fp16 = reshape(shape = concat_132x, x = linear_44_cast_fp16)[name = string("op_1292_cast_fp16")]; + tensor const_82_to_fp16 = const()[name = string("const_82_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_47_cast_fp16 = mul(x = var_1292_cast_fp16, y = const_82_to_fp16)[name = string("q_47_cast_fp16")]; + tensor var_1298 = const()[name = string("op_1298"), val = tensor([1, 1500, 12, -1])]; + tensor var_1299_cast_fp16 = reshape(shape = var_1298, x = k_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1299_cast_fp16")]; + tensor const_83_to_fp16 = const()[name = string("const_83_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_59_cast_fp16 = mul(x = var_1299_cast_fp16, y = const_83_to_fp16)[name = string("k_59_cast_fp16")]; + tensor var_1305 = const()[name = string("op_1305"), val = tensor([1, 1500, 12, -1])]; + tensor var_1306_cast_fp16 = reshape(shape = var_1305, x = v_57_internal_tensor_assign_1_cast_fp16)[name = string("op_1306_cast_fp16")]; + tensor var_1307 = const()[name = string("op_1307"), val = tensor([0, 2, 1, 3])]; + bool qk_35_transpose_x_0 = const()[name = string("qk_35_transpose_x_0"), val = bool(false)]; + bool qk_35_transpose_y_0 = const()[name = string("qk_35_transpose_y_0"), val = bool(false)]; + tensor transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = k_59_cast_fp16)[name = string("transpose_194")]; + tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = q_47_cast_fp16)[name = string("transpose_195")]; + tensor qk_35_cast_fp16 = matmul(transpose_x = qk_35_transpose_x_0, transpose_y = qk_35_transpose_y_0, x = transpose_119, y = transpose_120)[name = string("qk_35_cast_fp16")]; + tensor var_1311_cast_fp16 = softmax(axis = var_1155, x = qk_35_cast_fp16)[name = string("op_1311_cast_fp16")]; + bool var_1313_transpose_x_0 = const()[name = string("op_1313_transpose_x_0"), val = bool(false)]; + bool var_1313_transpose_y_0 = const()[name = string("op_1313_transpose_y_0"), val = bool(false)]; + tensor v_59_cast_fp16 = transpose(perm = var_1307, x = var_1306_cast_fp16)[name = string("transpose_196")]; + tensor var_1313_cast_fp16 = matmul(transpose_x = var_1313_transpose_x_0, transpose_y = var_1313_transpose_y_0, x = var_1311_cast_fp16, y = v_59_cast_fp16)[name = string("op_1313_cast_fp16")]; + tensor var_1314 = const()[name = string("op_1314"), val = tensor([0, 2, 1, 3])]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 768])]; + tensor var_1315_cast_fp16 = transpose(perm = var_1314, x = var_1313_cast_fp16)[name = string("transpose_193")]; + tensor x_103_cast_fp16 = reshape(shape = concat_133x, x = var_1315_cast_fp16)[name = string("x_103_cast_fp16")]; + tensor var_1319_to_fp16 = const()[name = string("op_1319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171676416)))]; + tensor var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172856128)))]; + tensor linear_45_cast_fp16 = linear(bias = var_1320_to_fp16, weight = var_1319_to_fp16, x = x_103_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_105_cast_fp16 = add(x = x_99_cast_fp16, y = linear_45_cast_fp16)[name = string("x_105_cast_fp16")]; + tensor var_1327_axes_0 = const()[name = string("op_1327_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172857728)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172859328)))]; + tensor var_1327_cast_fp16 = layer_norm(axes = var_1327_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_1161_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_105_cast_fp16)[name = string("op_1327_cast_fp16")]; + tensor var_1336_to_fp16 = const()[name = string("op_1336_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172860928)))]; + tensor var_1337_to_fp16 = const()[name = string("op_1337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177579584)))]; + tensor linear_46_cast_fp16 = linear(bias = var_1337_to_fp16, weight = var_1336_to_fp16, x = var_1327_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_109_mode_0 = const()[name = string("x_109_mode_0"), val = string("EXACT")]; + tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = linear_46_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1342_to_fp16 = const()[name = string("op_1342_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177585792)))]; + tensor var_1343_to_fp16 = const()[name = string("op_1343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182304448)))]; + tensor linear_47_cast_fp16 = linear(bias = var_1343_to_fp16, weight = var_1342_to_fp16, x = x_109_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_111_cast_fp16 = add(x = x_105_cast_fp16, y = linear_47_cast_fp16)[name = string("x_111_cast_fp16")]; + tensor k_cache_25_begin_0 = const()[name = string("k_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_25_end_0 = const()[name = string("k_cache_25_end_0"), val = tensor([7, 1, 448, 768])]; + tensor k_cache_25_end_mask_0 = const()[name = string("k_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_25_squeeze_mask_0 = const()[name = string("k_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_25_cast_fp16 = slice_by_index(begin = k_cache_25_begin_0, end = k_cache_25_end_0, end_mask = k_cache_25_end_mask_0, squeeze_mask = k_cache_25_squeeze_mask_0, x = coreml_update_state_34)[name = string("k_cache_25_cast_fp16")]; + tensor v_cache_25_begin_0 = const()[name = string("v_cache_25_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_25_end_0 = const()[name = string("v_cache_25_end_0"), val = tensor([7, 1, 448, 768])]; + tensor v_cache_25_end_mask_0 = const()[name = string("v_cache_25_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_25_squeeze_mask_0 = const()[name = string("v_cache_25_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_25_cast_fp16 = slice_by_index(begin = v_cache_25_begin_0, end = v_cache_25_end_0, end_mask = v_cache_25_end_mask_0, squeeze_mask = v_cache_25_squeeze_mask_0, x = coreml_update_state_35)[name = string("v_cache_25_cast_fp16")]; + tensor k_cache_27_begin_0 = const()[name = string("k_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor k_cache_27_end_0 = const()[name = string("k_cache_27_end_0"), val = tensor([7, 1, 1500, 768])]; + tensor k_cache_27_end_mask_0 = const()[name = string("k_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_27_squeeze_mask_0 = const()[name = string("k_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_27_cast_fp16 = slice_by_index(begin = k_cache_27_begin_0, end = k_cache_27_end_0, end_mask = k_cache_27_end_mask_0, squeeze_mask = k_cache_27_squeeze_mask_0, x = read_state_2)[name = string("k_cache_27_cast_fp16")]; + tensor v_cache_27_begin_0 = const()[name = string("v_cache_27_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor v_cache_27_end_0 = const()[name = string("v_cache_27_end_0"), val = tensor([7, 1, 1500, 768])]; + tensor v_cache_27_end_mask_0 = const()[name = string("v_cache_27_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_27_squeeze_mask_0 = const()[name = string("v_cache_27_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_27_cast_fp16 = slice_by_index(begin = v_cache_27_begin_0, end = v_cache_27_end_0, end_mask = v_cache_27_end_mask_0, squeeze_mask = v_cache_27_squeeze_mask_0, x = read_state_3)[name = string("v_cache_27_cast_fp16")]; + int32 var_1366 = const()[name = string("op_1366"), val = int32(-1)]; + tensor var_1384_axes_0 = const()[name = string("op_1384_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182306048)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182307648)))]; + fp16 var_1372_to_fp16 = const()[name = string("op_1372_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1384_cast_fp16 = layer_norm(axes = var_1384_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_111_cast_fp16)[name = string("op_1384_cast_fp16")]; + tensor var_1395_to_fp16 = const()[name = string("op_1395_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182309248)))]; + tensor var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183488960)))]; + tensor linear_48_cast_fp16 = linear(bias = var_1396_to_fp16, weight = var_1395_to_fp16, x = var_1384_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183490560)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1399_to_fp16, x = var_1384_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_1403_to_fp16 = const()[name = string("op_1403_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184670272)))]; + tensor var_1404_to_fp16 = const()[name = string("op_1404_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185849984)))]; + tensor linear_50_cast_fp16 = linear(bias = var_1404_to_fp16, weight = var_1403_to_fp16, x = var_1384_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_1406_shape_cast_fp16 = shape(x = linear_48_cast_fp16)[name = string("op_1406_shape_cast_fp16")]; + int32 gather_74_axis_0 = const()[name = string("gather_74_axis_0"), val = int32(0)]; + int32 gather_74_batch_dims_0 = const()[name = string("gather_74_batch_dims_0"), val = int32(0)]; + bool gather_74_validate_indices_0 = const()[name = string("gather_74_validate_indices_0"), val = bool(false)]; + string var_1406_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1406_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_74_to_uint16 = const()[name = string("select_74_to_uint16"), val = uint16(1)]; + tensor var_1406_shape_cast_fp16_to_uint16 = cast(dtype = var_1406_shape_cast_fp16_to_uint16_dtype_0, x = var_1406_shape_cast_fp16)[name = string("cast_138")]; + uint16 gather_74_cast_uint16 = gather(axis = gather_74_axis_0, batch_dims = gather_74_batch_dims_0, indices = select_74_to_uint16, validate_indices = gather_74_validate_indices_0, x = var_1406_shape_cast_fp16_to_uint16)[name = string("gather_74_cast_uint16")]; + string gather_74_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_74_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74_cast_uint16_to_int32 = cast(dtype = gather_74_cast_uint16_to_int32_dtype_0, x = gather_74_cast_uint16)[name = string("cast_137")]; + int32 end_step_15 = add(x = offset, y = gather_74_cast_uint16_to_int32)[name = string("end_step_15")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_98 = const()[name = string("expand_dims_98"), val = tensor([0])]; + tensor expand_dims_99_axes_0 = const()[name = string("expand_dims_99_axes_0"), val = tensor([0])]; + tensor expand_dims_99 = expand_dims(axes = expand_dims_99_axes_0, x = end_step_15)[name = string("expand_dims_99")]; + tensor concat_136_values0_0 = const()[name = string("concat_136_values0_0"), val = tensor([6])]; + int32 concat_136_axis_0 = const()[name = string("concat_136_axis_0"), val = int32(0)]; + bool concat_136_interleave_0 = const()[name = string("concat_136_interleave_0"), val = bool(false)]; + tensor concat_136 = concat(axis = concat_136_axis_0, interleave = concat_136_interleave_0, values = (concat_136_values0_0, expand_dims_96, expand_dims_1, expand_dims_98))[name = string("concat_136")]; + tensor concat_137_values0_0 = const()[name = string("concat_137_values0_0"), val = tensor([0])]; + tensor concat_137_values1_0 = const()[name = string("concat_137_values1_0"), val = tensor([0])]; + tensor concat_137_values3_0 = const()[name = string("concat_137_values3_0"), val = tensor([0])]; + int32 concat_137_axis_0 = const()[name = string("concat_137_axis_0"), val = int32(0)]; + bool concat_137_interleave_0 = const()[name = string("concat_137_interleave_0"), val = bool(false)]; + tensor concat_137 = concat(axis = concat_137_axis_0, interleave = concat_137_interleave_0, values = (concat_137_values0_0, concat_137_values1_0, expand_dims_99, concat_137_values3_0))[name = string("concat_137")]; + tensor k_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = k_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = k_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_7_stride_0, update = linear_49_cast_fp16, x = coreml_update_state_34)[name = string("k_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_7_cast_fp16, input = k_cache1)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_36 = read_state(input = k_cache1)[name = string("coreml_update_state_36")]; + tensor v_cache1_internal_tensor_assign_7_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_7_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_136, begin_mask = v_cache1_internal_tensor_assign_7_begin_mask_0, end = concat_137, end_mask = v_cache1_internal_tensor_assign_7_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_7_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_7_stride_0, update = linear_50_cast_fp16, x = coreml_update_state_35)[name = string("v_cache1_internal_tensor_assign_7_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_7_cast_fp16, input = v_cache1)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_37 = read_state(input = v_cache1)[name = string("coreml_update_state_37")]; + int32 concat_142_values0_0 = const()[name = string("concat_142_values0_0"), val = int32(1)]; + int32 concat_142_values2_0 = const()[name = string("concat_142_values2_0"), val = int32(768)]; + int32 concat_142_axis_0 = const()[name = string("concat_142_axis_0"), val = int32(0)]; + bool concat_142_interleave_0 = const()[name = string("concat_142_interleave_0"), val = bool(false)]; + tensor concat_142 = concat(axis = concat_142_axis_0, interleave = concat_142_interleave_0, values = (concat_142_values0_0, end_step_15, concat_142_values2_0))[name = string("concat_142")]; + tensor var_1422_begin_0 = const()[name = string("op_1422_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1422_end_mask_0 = const()[name = string("op_1422_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1422_cast_fp16 = slice_by_index(begin = var_1422_begin_0, end = concat_142, end_mask = var_1422_end_mask_0, x = k_cache_25_cast_fp16)[name = string("op_1422_cast_fp16")]; + tensor var_1425_begin_0 = const()[name = string("op_1425_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1425_end_mask_0 = const()[name = string("op_1425_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1425_cast_fp16 = slice_by_index(begin = var_1425_begin_0, end = concat_142, end_mask = var_1425_end_mask_0, x = v_cache_25_cast_fp16)[name = string("op_1425_cast_fp16")]; + tensor concat_144x = const()[name = string("concat_144x"), val = tensor([1, -1, 12, 64])]; + tensor var_1435_cast_fp16 = reshape(shape = concat_144x, x = linear_48_cast_fp16)[name = string("op_1435_cast_fp16")]; + tensor const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_51_cast_fp16 = mul(x = var_1435_cast_fp16, y = const_84_to_fp16)[name = string("q_51_cast_fp16")]; + tensor concat_145x = const()[name = string("concat_145x"), val = tensor([1, -1, 12, 64])]; + tensor var_1442_cast_fp16 = reshape(shape = concat_145x, x = var_1422_cast_fp16)[name = string("op_1442_cast_fp16")]; + tensor const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_65_cast_fp16 = mul(x = var_1442_cast_fp16, y = const_85_to_fp16)[name = string("k_65_cast_fp16")]; + tensor concat_146x = const()[name = string("concat_146x"), val = tensor([1, -1, 12, 64])]; + tensor var_1449_cast_fp16 = reshape(shape = concat_146x, x = var_1425_cast_fp16)[name = string("op_1449_cast_fp16")]; + tensor var_1450 = const()[name = string("op_1450"), val = tensor([0, 2, 1, 3])]; + bool qk_37_transpose_x_0 = const()[name = string("qk_37_transpose_x_0"), val = bool(false)]; + bool qk_37_transpose_y_0 = const()[name = string("qk_37_transpose_y_0"), val = bool(false)]; + tensor transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = k_65_cast_fp16)[name = string("transpose_190")]; + tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = q_51_cast_fp16)[name = string("transpose_191")]; + tensor qk_37_cast_fp16 = matmul(transpose_x = qk_37_transpose_x_0, transpose_y = qk_37_transpose_y_0, x = transpose_121, y = transpose_122)[name = string("qk_37_cast_fp16")]; + int32 concat_147_values1_0 = const()[name = string("concat_147_values1_0"), val = int32(448)]; + int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; + bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; + tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_74_cast_uint16_to_int32, concat_147_values1_0))[name = string("concat_147")]; + tensor var_1453_begin_0 = const()[name = string("op_1453_begin_0"), val = tensor([0, 0])]; + tensor var_1453_end_mask_0 = const()[name = string("op_1453_end_mask_0"), val = tensor([false, true])]; + tensor var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = concat_147, end_mask = var_1453_end_mask_0, x = mask_to_fp16)[name = string("op_1453_cast_fp16")]; + int32 concat_148_values0_0 = const()[name = string("concat_148_values0_0"), val = int32(0)]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (concat_148_values0_0, gather_74_cast_uint16_to_int32))[name = string("concat_148")]; + tensor var_1454_begin_0 = const()[name = string("op_1454_begin_0"), val = tensor([0, 0])]; + tensor var_1454_end_mask_0 = const()[name = string("op_1454_end_mask_0"), val = tensor([true, false])]; + tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = concat_148, end_mask = var_1454_end_mask_0, x = var_1453_cast_fp16)[name = string("op_1454_cast_fp16")]; + tensor qk_39_cast_fp16 = add(x = qk_37_cast_fp16, y = var_1454_cast_fp16)[name = string("qk_39_cast_fp16")]; + tensor var_1457_cast_fp16 = softmax(axis = var_1366, x = qk_39_cast_fp16)[name = string("op_1457_cast_fp16")]; + bool var_1459_transpose_x_0 = const()[name = string("op_1459_transpose_x_0"), val = bool(false)]; + bool var_1459_transpose_y_0 = const()[name = string("op_1459_transpose_y_0"), val = bool(false)]; + tensor v_65_cast_fp16 = transpose(perm = var_1450, x = var_1449_cast_fp16)[name = string("transpose_192")]; + tensor var_1459_cast_fp16 = matmul(transpose_x = var_1459_transpose_x_0, transpose_y = var_1459_transpose_y_0, x = var_1457_cast_fp16, y = v_65_cast_fp16)[name = string("op_1459_cast_fp16")]; + tensor var_1460 = const()[name = string("op_1460"), val = tensor([0, 2, 1, 3])]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, -1, 768])]; + tensor var_1461_cast_fp16 = transpose(perm = var_1460, x = var_1459_cast_fp16)[name = string("transpose_189")]; + tensor x_115_cast_fp16 = reshape(shape = concat_149x, x = var_1461_cast_fp16)[name = string("x_115_cast_fp16")]; + tensor var_1465_to_fp16 = const()[name = string("op_1465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185851584)))]; + tensor var_1466_to_fp16 = const()[name = string("op_1466_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187031296)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1466_to_fp16, weight = var_1465_to_fp16, x = x_115_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_117_cast_fp16 = add(x = x_111_cast_fp16, y = linear_51_cast_fp16)[name = string("x_117_cast_fp16")]; + tensor var_1473_axes_0 = const()[name = string("op_1473_axes_0"), val = tensor([-1])]; + tensor blocks_6_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187032896)))]; + tensor blocks_6_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187034496)))]; + tensor var_1473_cast_fp16 = layer_norm(axes = var_1473_axes_0, beta = blocks_6_cross_attn_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_cross_attn_ln_weight_to_fp16, x = x_117_cast_fp16)[name = string("op_1473_cast_fp16")]; + tensor var_1482_to_fp16 = const()[name = string("op_1482_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187036096)))]; + tensor var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188215808)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1483_to_fp16, weight = var_1482_to_fp16, x = var_1473_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor concat_150 = const()[name = string("concat_150"), val = tensor([0, 0, 0])]; + tensor concat_151 = const()[name = string("concat_151"), val = tensor([0, 1500, 0])]; + tensor k_67_internal_tensor_assign_1_stride_0 = const()[name = string("k_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_150, begin_mask = k_67_internal_tensor_assign_1_begin_mask_0, end = concat_151, end_mask = k_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_67_internal_tensor_assign_1_squeeze_mask_0, stride = k_67_internal_tensor_assign_1_stride_0, update = k_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("k_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_152 = const()[name = string("concat_152"), val = tensor([0, 0, 0])]; + tensor concat_153 = const()[name = string("concat_153"), val = tensor([0, 1500, 0])]; + tensor v_67_internal_tensor_assign_1_stride_0 = const()[name = string("v_67_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_67_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_67_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_67_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_67_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_152, begin_mask = v_67_internal_tensor_assign_1_begin_mask_0, end = concat_153, end_mask = v_67_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_67_internal_tensor_assign_1_squeeze_mask_0, stride = v_67_internal_tensor_assign_1_stride_0, update = v_cache_27_cast_fp16, x = k_7_to_fp16)[name = string("v_67_internal_tensor_assign_1_cast_fp16")]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 12, 64])]; + tensor var_1503_cast_fp16 = reshape(shape = concat_154x, x = linear_52_cast_fp16)[name = string("op_1503_cast_fp16")]; + tensor const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_55_cast_fp16 = mul(x = var_1503_cast_fp16, y = const_86_to_fp16)[name = string("q_55_cast_fp16")]; + tensor var_1509 = const()[name = string("op_1509"), val = tensor([1, 1500, 12, -1])]; + tensor var_1510_cast_fp16 = reshape(shape = var_1509, x = k_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1510_cast_fp16")]; + tensor const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_69_cast_fp16 = mul(x = var_1510_cast_fp16, y = const_87_to_fp16)[name = string("k_69_cast_fp16")]; + tensor var_1516 = const()[name = string("op_1516"), val = tensor([1, 1500, 12, -1])]; + tensor var_1517_cast_fp16 = reshape(shape = var_1516, x = v_67_internal_tensor_assign_1_cast_fp16)[name = string("op_1517_cast_fp16")]; + tensor var_1518 = const()[name = string("op_1518"), val = tensor([0, 2, 1, 3])]; + bool qk_41_transpose_x_0 = const()[name = string("qk_41_transpose_x_0"), val = bool(false)]; + bool qk_41_transpose_y_0 = const()[name = string("qk_41_transpose_y_0"), val = bool(false)]; + tensor transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = k_69_cast_fp16)[name = string("transpose_186")]; + tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = q_55_cast_fp16)[name = string("transpose_187")]; + tensor qk_41_cast_fp16 = matmul(transpose_x = qk_41_transpose_x_0, transpose_y = qk_41_transpose_y_0, x = transpose_123, y = transpose_124)[name = string("qk_41_cast_fp16")]; + tensor var_1522_cast_fp16 = softmax(axis = var_1366, x = qk_41_cast_fp16)[name = string("op_1522_cast_fp16")]; + bool var_1524_transpose_x_0 = const()[name = string("op_1524_transpose_x_0"), val = bool(false)]; + bool var_1524_transpose_y_0 = const()[name = string("op_1524_transpose_y_0"), val = bool(false)]; + tensor v_69_cast_fp16 = transpose(perm = var_1518, x = var_1517_cast_fp16)[name = string("transpose_188")]; + tensor var_1524_cast_fp16 = matmul(transpose_x = var_1524_transpose_x_0, transpose_y = var_1524_transpose_y_0, x = var_1522_cast_fp16, y = v_69_cast_fp16)[name = string("op_1524_cast_fp16")]; + tensor var_1525 = const()[name = string("op_1525"), val = tensor([0, 2, 1, 3])]; + tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 768])]; + tensor var_1526_cast_fp16 = transpose(perm = var_1525, x = var_1524_cast_fp16)[name = string("transpose_185")]; + tensor x_121_cast_fp16 = reshape(shape = concat_155x, x = var_1526_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1530_to_fp16 = const()[name = string("op_1530_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188217408)))]; + tensor var_1531_to_fp16 = const()[name = string("op_1531_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189397120)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1531_to_fp16, weight = var_1530_to_fp16, x = x_121_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_123_cast_fp16 = add(x = x_117_cast_fp16, y = linear_53_cast_fp16)[name = string("x_123_cast_fp16")]; + tensor var_1538_axes_0 = const()[name = string("op_1538_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189398720)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189400320)))]; + tensor var_1538_cast_fp16 = layer_norm(axes = var_1538_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_1372_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_123_cast_fp16)[name = string("op_1538_cast_fp16")]; + tensor var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189401920)))]; + tensor var_1548_to_fp16 = const()[name = string("op_1548_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194120576)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1548_to_fp16, weight = var_1547_to_fp16, x = var_1538_cast_fp16)[name = string("linear_54_cast_fp16")]; + string x_127_mode_0 = const()[name = string("x_127_mode_0"), val = string("EXACT")]; + tensor x_127_cast_fp16 = gelu(mode = x_127_mode_0, x = linear_54_cast_fp16)[name = string("x_127_cast_fp16")]; + tensor var_1553_to_fp16 = const()[name = string("op_1553_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194126784)))]; + tensor var_1554_to_fp16 = const()[name = string("op_1554_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198845440)))]; + tensor linear_55_cast_fp16 = linear(bias = var_1554_to_fp16, weight = var_1553_to_fp16, x = x_127_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_55_cast_fp16)[name = string("x_129_cast_fp16")]; + tensor k_cache_29_begin_0 = const()[name = string("k_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_29_end_0 = const()[name = string("k_cache_29_end_0"), val = tensor([8, 1, 448, 768])]; + tensor k_cache_29_end_mask_0 = const()[name = string("k_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_29_squeeze_mask_0 = const()[name = string("k_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_29_cast_fp16 = slice_by_index(begin = k_cache_29_begin_0, end = k_cache_29_end_0, end_mask = k_cache_29_end_mask_0, squeeze_mask = k_cache_29_squeeze_mask_0, x = coreml_update_state_36)[name = string("k_cache_29_cast_fp16")]; + tensor v_cache_29_begin_0 = const()[name = string("v_cache_29_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_29_end_0 = const()[name = string("v_cache_29_end_0"), val = tensor([8, 1, 448, 768])]; + tensor v_cache_29_end_mask_0 = const()[name = string("v_cache_29_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_29_squeeze_mask_0 = const()[name = string("v_cache_29_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_29_cast_fp16 = slice_by_index(begin = v_cache_29_begin_0, end = v_cache_29_end_0, end_mask = v_cache_29_end_mask_0, squeeze_mask = v_cache_29_squeeze_mask_0, x = coreml_update_state_37)[name = string("v_cache_29_cast_fp16")]; + tensor k_cache_31_begin_0 = const()[name = string("k_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor k_cache_31_end_0 = const()[name = string("k_cache_31_end_0"), val = tensor([8, 1, 1500, 768])]; + tensor k_cache_31_end_mask_0 = const()[name = string("k_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_31_squeeze_mask_0 = const()[name = string("k_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_31_cast_fp16 = slice_by_index(begin = k_cache_31_begin_0, end = k_cache_31_end_0, end_mask = k_cache_31_end_mask_0, squeeze_mask = k_cache_31_squeeze_mask_0, x = read_state_2)[name = string("k_cache_31_cast_fp16")]; + tensor v_cache_31_begin_0 = const()[name = string("v_cache_31_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor v_cache_31_end_0 = const()[name = string("v_cache_31_end_0"), val = tensor([8, 1, 1500, 768])]; + tensor v_cache_31_end_mask_0 = const()[name = string("v_cache_31_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_31_squeeze_mask_0 = const()[name = string("v_cache_31_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_31_cast_fp16 = slice_by_index(begin = v_cache_31_begin_0, end = v_cache_31_end_0, end_mask = v_cache_31_end_mask_0, squeeze_mask = v_cache_31_squeeze_mask_0, x = read_state_3)[name = string("v_cache_31_cast_fp16")]; + int32 var_1577 = const()[name = string("op_1577"), val = int32(-1)]; + tensor var_1595_axes_0 = const()[name = string("op_1595_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198847040)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198848640)))]; + fp16 var_1583_to_fp16 = const()[name = string("op_1583_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1595_cast_fp16 = layer_norm(axes = var_1595_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_129_cast_fp16)[name = string("op_1595_cast_fp16")]; + tensor var_1606_to_fp16 = const()[name = string("op_1606_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198850240)))]; + tensor var_1607_to_fp16 = const()[name = string("op_1607_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200029952)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1607_to_fp16, weight = var_1606_to_fp16, x = var_1595_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1610_to_fp16 = const()[name = string("op_1610_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200031552)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1610_to_fp16, x = var_1595_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor var_1614_to_fp16 = const()[name = string("op_1614_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201211264)))]; + tensor var_1615_to_fp16 = const()[name = string("op_1615_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202390976)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1615_to_fp16, weight = var_1614_to_fp16, x = var_1595_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor var_1617_shape_cast_fp16 = shape(x = linear_56_cast_fp16)[name = string("op_1617_shape_cast_fp16")]; + int32 gather_86_axis_0 = const()[name = string("gather_86_axis_0"), val = int32(0)]; + int32 gather_86_batch_dims_0 = const()[name = string("gather_86_batch_dims_0"), val = int32(0)]; + bool gather_86_validate_indices_0 = const()[name = string("gather_86_validate_indices_0"), val = bool(false)]; + string var_1617_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1617_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_86_to_uint16 = const()[name = string("select_86_to_uint16"), val = uint16(1)]; + tensor var_1617_shape_cast_fp16_to_uint16 = cast(dtype = var_1617_shape_cast_fp16_to_uint16_dtype_0, x = var_1617_shape_cast_fp16)[name = string("cast_136")]; + uint16 gather_86_cast_uint16 = gather(axis = gather_86_axis_0, batch_dims = gather_86_batch_dims_0, indices = select_86_to_uint16, validate_indices = gather_86_validate_indices_0, x = var_1617_shape_cast_fp16_to_uint16)[name = string("gather_86_cast_uint16")]; + string gather_86_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_86_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_86_cast_uint16_to_int32 = cast(dtype = gather_86_cast_uint16_to_int32_dtype_0, x = gather_86_cast_uint16)[name = string("cast_135")]; + int32 end_step_17 = add(x = offset, y = gather_86_cast_uint16_to_int32)[name = string("end_step_17")]; + tensor expand_dims_112 = const()[name = string("expand_dims_112"), val = tensor([0])]; + tensor expand_dims_114 = const()[name = string("expand_dims_114"), val = tensor([0])]; + tensor expand_dims_115_axes_0 = const()[name = string("expand_dims_115_axes_0"), val = tensor([0])]; + tensor expand_dims_115 = expand_dims(axes = expand_dims_115_axes_0, x = end_step_17)[name = string("expand_dims_115")]; + tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([7])]; + int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; + bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; + tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_112, expand_dims_1, expand_dims_114))[name = string("concat_158")]; + tensor concat_159_values0_0 = const()[name = string("concat_159_values0_0"), val = tensor([0])]; + tensor concat_159_values1_0 = const()[name = string("concat_159_values1_0"), val = tensor([0])]; + tensor concat_159_values3_0 = const()[name = string("concat_159_values3_0"), val = tensor([0])]; + int32 concat_159_axis_0 = const()[name = string("concat_159_axis_0"), val = int32(0)]; + bool concat_159_interleave_0 = const()[name = string("concat_159_interleave_0"), val = bool(false)]; + tensor concat_159 = concat(axis = concat_159_axis_0, interleave = concat_159_interleave_0, values = (concat_159_values0_0, concat_159_values1_0, expand_dims_115, concat_159_values3_0))[name = string("concat_159")]; + tensor k_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = k_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = k_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_8_stride_0, update = linear_57_cast_fp16, x = coreml_update_state_36)[name = string("k_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_8_cast_fp16, input = k_cache1)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_38 = read_state(input = k_cache1)[name = string("coreml_update_state_38")]; + tensor v_cache1_internal_tensor_assign_8_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_8_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_158, begin_mask = v_cache1_internal_tensor_assign_8_begin_mask_0, end = concat_159, end_mask = v_cache1_internal_tensor_assign_8_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_8_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_8_stride_0, update = linear_58_cast_fp16, x = coreml_update_state_37)[name = string("v_cache1_internal_tensor_assign_8_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_8_cast_fp16, input = v_cache1)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_39 = read_state(input = v_cache1)[name = string("coreml_update_state_39")]; + int32 concat_164_values0_0 = const()[name = string("concat_164_values0_0"), val = int32(1)]; + int32 concat_164_values2_0 = const()[name = string("concat_164_values2_0"), val = int32(768)]; + int32 concat_164_axis_0 = const()[name = string("concat_164_axis_0"), val = int32(0)]; + bool concat_164_interleave_0 = const()[name = string("concat_164_interleave_0"), val = bool(false)]; + tensor concat_164 = concat(axis = concat_164_axis_0, interleave = concat_164_interleave_0, values = (concat_164_values0_0, end_step_17, concat_164_values2_0))[name = string("concat_164")]; + tensor var_1633_begin_0 = const()[name = string("op_1633_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1633_end_mask_0 = const()[name = string("op_1633_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1633_cast_fp16 = slice_by_index(begin = var_1633_begin_0, end = concat_164, end_mask = var_1633_end_mask_0, x = k_cache_29_cast_fp16)[name = string("op_1633_cast_fp16")]; + tensor var_1636_begin_0 = const()[name = string("op_1636_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1636_end_mask_0 = const()[name = string("op_1636_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1636_cast_fp16 = slice_by_index(begin = var_1636_begin_0, end = concat_164, end_mask = var_1636_end_mask_0, x = v_cache_29_cast_fp16)[name = string("op_1636_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, -1, 12, 64])]; + tensor var_1646_cast_fp16 = reshape(shape = concat_166x, x = linear_56_cast_fp16)[name = string("op_1646_cast_fp16")]; + tensor const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_59_cast_fp16 = mul(x = var_1646_cast_fp16, y = const_88_to_fp16)[name = string("q_59_cast_fp16")]; + tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, -1, 12, 64])]; + tensor var_1653_cast_fp16 = reshape(shape = concat_167x, x = var_1633_cast_fp16)[name = string("op_1653_cast_fp16")]; + tensor const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_75_cast_fp16 = mul(x = var_1653_cast_fp16, y = const_89_to_fp16)[name = string("k_75_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, -1, 12, 64])]; + tensor var_1660_cast_fp16 = reshape(shape = concat_168x, x = var_1636_cast_fp16)[name = string("op_1660_cast_fp16")]; + tensor var_1661 = const()[name = string("op_1661"), val = tensor([0, 2, 1, 3])]; + bool qk_43_transpose_x_0 = const()[name = string("qk_43_transpose_x_0"), val = bool(false)]; + bool qk_43_transpose_y_0 = const()[name = string("qk_43_transpose_y_0"), val = bool(false)]; + tensor transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = k_75_cast_fp16)[name = string("transpose_182")]; + tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = q_59_cast_fp16)[name = string("transpose_183")]; + tensor qk_43_cast_fp16 = matmul(transpose_x = qk_43_transpose_x_0, transpose_y = qk_43_transpose_y_0, x = transpose_125, y = transpose_126)[name = string("qk_43_cast_fp16")]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(448)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (gather_86_cast_uint16_to_int32, concat_169_values1_0))[name = string("concat_169")]; + tensor var_1664_begin_0 = const()[name = string("op_1664_begin_0"), val = tensor([0, 0])]; + tensor var_1664_end_mask_0 = const()[name = string("op_1664_end_mask_0"), val = tensor([false, true])]; + tensor var_1664_cast_fp16 = slice_by_index(begin = var_1664_begin_0, end = concat_169, end_mask = var_1664_end_mask_0, x = mask_to_fp16)[name = string("op_1664_cast_fp16")]; + int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(0)]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, gather_86_cast_uint16_to_int32))[name = string("concat_170")]; + tensor var_1665_begin_0 = const()[name = string("op_1665_begin_0"), val = tensor([0, 0])]; + tensor var_1665_end_mask_0 = const()[name = string("op_1665_end_mask_0"), val = tensor([true, false])]; + tensor var_1665_cast_fp16 = slice_by_index(begin = var_1665_begin_0, end = concat_170, end_mask = var_1665_end_mask_0, x = var_1664_cast_fp16)[name = string("op_1665_cast_fp16")]; + tensor qk_45_cast_fp16 = add(x = qk_43_cast_fp16, y = var_1665_cast_fp16)[name = string("qk_45_cast_fp16")]; + tensor var_1668_cast_fp16 = softmax(axis = var_1577, x = qk_45_cast_fp16)[name = string("op_1668_cast_fp16")]; + bool var_1670_transpose_x_0 = const()[name = string("op_1670_transpose_x_0"), val = bool(false)]; + bool var_1670_transpose_y_0 = const()[name = string("op_1670_transpose_y_0"), val = bool(false)]; + tensor v_75_cast_fp16 = transpose(perm = var_1661, x = var_1660_cast_fp16)[name = string("transpose_184")]; + tensor var_1670_cast_fp16 = matmul(transpose_x = var_1670_transpose_x_0, transpose_y = var_1670_transpose_y_0, x = var_1668_cast_fp16, y = v_75_cast_fp16)[name = string("op_1670_cast_fp16")]; + tensor var_1671 = const()[name = string("op_1671"), val = tensor([0, 2, 1, 3])]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 768])]; + tensor var_1672_cast_fp16 = transpose(perm = var_1671, x = var_1670_cast_fp16)[name = string("transpose_181")]; + tensor x_133_cast_fp16 = reshape(shape = concat_171x, x = var_1672_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1676_to_fp16 = const()[name = string("op_1676_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202392576)))]; + tensor var_1677_to_fp16 = const()[name = string("op_1677_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203572288)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1677_to_fp16, weight = var_1676_to_fp16, x = x_133_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_135_cast_fp16 = add(x = x_129_cast_fp16, y = linear_59_cast_fp16)[name = string("x_135_cast_fp16")]; + tensor var_1684_axes_0 = const()[name = string("op_1684_axes_0"), val = tensor([-1])]; + tensor blocks_7_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203573888)))]; + tensor blocks_7_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203575488)))]; + tensor var_1684_cast_fp16 = layer_norm(axes = var_1684_axes_0, beta = blocks_7_cross_attn_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_cross_attn_ln_weight_to_fp16, x = x_135_cast_fp16)[name = string("op_1684_cast_fp16")]; + tensor var_1693_to_fp16 = const()[name = string("op_1693_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203577088)))]; + tensor var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204756800)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1694_to_fp16, weight = var_1693_to_fp16, x = var_1684_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor concat_172 = const()[name = string("concat_172"), val = tensor([0, 0, 0])]; + tensor concat_173 = const()[name = string("concat_173"), val = tensor([0, 1500, 0])]; + tensor k_77_internal_tensor_assign_1_stride_0 = const()[name = string("k_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_172, begin_mask = k_77_internal_tensor_assign_1_begin_mask_0, end = concat_173, end_mask = k_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_77_internal_tensor_assign_1_squeeze_mask_0, stride = k_77_internal_tensor_assign_1_stride_0, update = k_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("k_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_174 = const()[name = string("concat_174"), val = tensor([0, 0, 0])]; + tensor concat_175 = const()[name = string("concat_175"), val = tensor([0, 1500, 0])]; + tensor v_77_internal_tensor_assign_1_stride_0 = const()[name = string("v_77_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_77_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_77_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_77_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_77_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_174, begin_mask = v_77_internal_tensor_assign_1_begin_mask_0, end = concat_175, end_mask = v_77_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_77_internal_tensor_assign_1_squeeze_mask_0, stride = v_77_internal_tensor_assign_1_stride_0, update = v_cache_31_cast_fp16, x = k_7_to_fp16)[name = string("v_77_internal_tensor_assign_1_cast_fp16")]; + tensor concat_176x = const()[name = string("concat_176x"), val = tensor([1, -1, 12, 64])]; + tensor var_1714_cast_fp16 = reshape(shape = concat_176x, x = linear_60_cast_fp16)[name = string("op_1714_cast_fp16")]; + tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_63_cast_fp16 = mul(x = var_1714_cast_fp16, y = const_90_to_fp16)[name = string("q_63_cast_fp16")]; + tensor var_1720 = const()[name = string("op_1720"), val = tensor([1, 1500, 12, -1])]; + tensor var_1721_cast_fp16 = reshape(shape = var_1720, x = k_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1721_cast_fp16")]; + tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_79_cast_fp16 = mul(x = var_1721_cast_fp16, y = const_91_to_fp16)[name = string("k_79_cast_fp16")]; + tensor var_1727 = const()[name = string("op_1727"), val = tensor([1, 1500, 12, -1])]; + tensor var_1728_cast_fp16 = reshape(shape = var_1727, x = v_77_internal_tensor_assign_1_cast_fp16)[name = string("op_1728_cast_fp16")]; + tensor var_1729 = const()[name = string("op_1729"), val = tensor([0, 2, 1, 3])]; + bool qk_47_transpose_x_0 = const()[name = string("qk_47_transpose_x_0"), val = bool(false)]; + bool qk_47_transpose_y_0 = const()[name = string("qk_47_transpose_y_0"), val = bool(false)]; + tensor transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = k_79_cast_fp16)[name = string("transpose_178")]; + tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = q_63_cast_fp16)[name = string("transpose_179")]; + tensor qk_47_cast_fp16 = matmul(transpose_x = qk_47_transpose_x_0, transpose_y = qk_47_transpose_y_0, x = transpose_127, y = transpose_128)[name = string("qk_47_cast_fp16")]; + tensor var_1733_cast_fp16 = softmax(axis = var_1577, x = qk_47_cast_fp16)[name = string("op_1733_cast_fp16")]; + bool var_1735_transpose_x_0 = const()[name = string("op_1735_transpose_x_0"), val = bool(false)]; + bool var_1735_transpose_y_0 = const()[name = string("op_1735_transpose_y_0"), val = bool(false)]; + tensor v_79_cast_fp16 = transpose(perm = var_1729, x = var_1728_cast_fp16)[name = string("transpose_180")]; + tensor var_1735_cast_fp16 = matmul(transpose_x = var_1735_transpose_x_0, transpose_y = var_1735_transpose_y_0, x = var_1733_cast_fp16, y = v_79_cast_fp16)[name = string("op_1735_cast_fp16")]; + tensor var_1736 = const()[name = string("op_1736"), val = tensor([0, 2, 1, 3])]; + tensor concat_177x = const()[name = string("concat_177x"), val = tensor([1, -1, 768])]; + tensor var_1737_cast_fp16 = transpose(perm = var_1736, x = var_1735_cast_fp16)[name = string("transpose_177")]; + tensor x_139_cast_fp16 = reshape(shape = concat_177x, x = var_1737_cast_fp16)[name = string("x_139_cast_fp16")]; + tensor var_1741_to_fp16 = const()[name = string("op_1741_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204758400)))]; + tensor var_1742_to_fp16 = const()[name = string("op_1742_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205938112)))]; + tensor linear_61_cast_fp16 = linear(bias = var_1742_to_fp16, weight = var_1741_to_fp16, x = x_139_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor x_141_cast_fp16 = add(x = x_135_cast_fp16, y = linear_61_cast_fp16)[name = string("x_141_cast_fp16")]; + tensor var_1749_axes_0 = const()[name = string("op_1749_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205939712)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205941312)))]; + tensor var_1749_cast_fp16 = layer_norm(axes = var_1749_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_1583_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_141_cast_fp16)[name = string("op_1749_cast_fp16")]; + tensor var_1758_to_fp16 = const()[name = string("op_1758_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205942912)))]; + tensor var_1759_to_fp16 = const()[name = string("op_1759_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210661568)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1759_to_fp16, weight = var_1758_to_fp16, x = var_1749_cast_fp16)[name = string("linear_62_cast_fp16")]; + string x_145_mode_0 = const()[name = string("x_145_mode_0"), val = string("EXACT")]; + tensor x_145_cast_fp16 = gelu(mode = x_145_mode_0, x = linear_62_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1764_to_fp16 = const()[name = string("op_1764_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210667776)))]; + tensor var_1765_to_fp16 = const()[name = string("op_1765_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215386432)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1765_to_fp16, weight = var_1764_to_fp16, x = x_145_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_147_cast_fp16 = add(x = x_141_cast_fp16, y = linear_63_cast_fp16)[name = string("x_147_cast_fp16")]; + tensor k_cache_33_begin_0 = const()[name = string("k_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_33_end_0 = const()[name = string("k_cache_33_end_0"), val = tensor([9, 1, 448, 768])]; + tensor k_cache_33_end_mask_0 = const()[name = string("k_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_33_squeeze_mask_0 = const()[name = string("k_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_33_cast_fp16 = slice_by_index(begin = k_cache_33_begin_0, end = k_cache_33_end_0, end_mask = k_cache_33_end_mask_0, squeeze_mask = k_cache_33_squeeze_mask_0, x = coreml_update_state_38)[name = string("k_cache_33_cast_fp16")]; + tensor v_cache_33_begin_0 = const()[name = string("v_cache_33_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_33_end_0 = const()[name = string("v_cache_33_end_0"), val = tensor([9, 1, 448, 768])]; + tensor v_cache_33_end_mask_0 = const()[name = string("v_cache_33_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_33_squeeze_mask_0 = const()[name = string("v_cache_33_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_33_cast_fp16 = slice_by_index(begin = v_cache_33_begin_0, end = v_cache_33_end_0, end_mask = v_cache_33_end_mask_0, squeeze_mask = v_cache_33_squeeze_mask_0, x = coreml_update_state_39)[name = string("v_cache_33_cast_fp16")]; + tensor k_cache_35_begin_0 = const()[name = string("k_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor k_cache_35_end_0 = const()[name = string("k_cache_35_end_0"), val = tensor([9, 1, 1500, 768])]; + tensor k_cache_35_end_mask_0 = const()[name = string("k_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_35_squeeze_mask_0 = const()[name = string("k_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_35_cast_fp16 = slice_by_index(begin = k_cache_35_begin_0, end = k_cache_35_end_0, end_mask = k_cache_35_end_mask_0, squeeze_mask = k_cache_35_squeeze_mask_0, x = read_state_2)[name = string("k_cache_35_cast_fp16")]; + tensor v_cache_35_begin_0 = const()[name = string("v_cache_35_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor v_cache_35_end_0 = const()[name = string("v_cache_35_end_0"), val = tensor([9, 1, 1500, 768])]; + tensor v_cache_35_end_mask_0 = const()[name = string("v_cache_35_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_35_squeeze_mask_0 = const()[name = string("v_cache_35_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_35_cast_fp16 = slice_by_index(begin = v_cache_35_begin_0, end = v_cache_35_end_0, end_mask = v_cache_35_end_mask_0, squeeze_mask = v_cache_35_squeeze_mask_0, x = read_state_3)[name = string("v_cache_35_cast_fp16")]; + int32 var_1788 = const()[name = string("op_1788"), val = int32(-1)]; + tensor var_1806_axes_0 = const()[name = string("op_1806_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215388032)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215389632)))]; + fp16 var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1806_cast_fp16 = layer_norm(axes = var_1806_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_147_cast_fp16)[name = string("op_1806_cast_fp16")]; + tensor var_1817_to_fp16 = const()[name = string("op_1817_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215391232)))]; + tensor var_1818_to_fp16 = const()[name = string("op_1818_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216570944)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1818_to_fp16, weight = var_1817_to_fp16, x = var_1806_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor var_1821_to_fp16 = const()[name = string("op_1821_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216572544)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1821_to_fp16, x = var_1806_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217752256)))]; + tensor var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218931968)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1826_to_fp16, weight = var_1825_to_fp16, x = var_1806_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1828_shape_cast_fp16 = shape(x = linear_64_cast_fp16)[name = string("op_1828_shape_cast_fp16")]; + int32 gather_98_axis_0 = const()[name = string("gather_98_axis_0"), val = int32(0)]; + int32 gather_98_batch_dims_0 = const()[name = string("gather_98_batch_dims_0"), val = int32(0)]; + bool gather_98_validate_indices_0 = const()[name = string("gather_98_validate_indices_0"), val = bool(false)]; + string var_1828_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1828_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_98_to_uint16 = const()[name = string("select_98_to_uint16"), val = uint16(1)]; + tensor var_1828_shape_cast_fp16_to_uint16 = cast(dtype = var_1828_shape_cast_fp16_to_uint16_dtype_0, x = var_1828_shape_cast_fp16)[name = string("cast_134")]; + uint16 gather_98_cast_uint16 = gather(axis = gather_98_axis_0, batch_dims = gather_98_batch_dims_0, indices = select_98_to_uint16, validate_indices = gather_98_validate_indices_0, x = var_1828_shape_cast_fp16_to_uint16)[name = string("gather_98_cast_uint16")]; + string gather_98_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_98_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_98_cast_uint16_to_int32 = cast(dtype = gather_98_cast_uint16_to_int32_dtype_0, x = gather_98_cast_uint16)[name = string("cast_133")]; + int32 end_step_19 = add(x = offset, y = gather_98_cast_uint16_to_int32)[name = string("end_step_19")]; + tensor expand_dims_128 = const()[name = string("expand_dims_128"), val = tensor([0])]; + tensor expand_dims_130 = const()[name = string("expand_dims_130"), val = tensor([0])]; + tensor expand_dims_131_axes_0 = const()[name = string("expand_dims_131_axes_0"), val = tensor([0])]; + tensor expand_dims_131 = expand_dims(axes = expand_dims_131_axes_0, x = end_step_19)[name = string("expand_dims_131")]; + tensor concat_180_values0_0 = const()[name = string("concat_180_values0_0"), val = tensor([8])]; + int32 concat_180_axis_0 = const()[name = string("concat_180_axis_0"), val = int32(0)]; + bool concat_180_interleave_0 = const()[name = string("concat_180_interleave_0"), val = bool(false)]; + tensor concat_180 = concat(axis = concat_180_axis_0, interleave = concat_180_interleave_0, values = (concat_180_values0_0, expand_dims_128, expand_dims_1, expand_dims_130))[name = string("concat_180")]; + tensor concat_181_values0_0 = const()[name = string("concat_181_values0_0"), val = tensor([0])]; + tensor concat_181_values1_0 = const()[name = string("concat_181_values1_0"), val = tensor([0])]; + tensor concat_181_values3_0 = const()[name = string("concat_181_values3_0"), val = tensor([0])]; + int32 concat_181_axis_0 = const()[name = string("concat_181_axis_0"), val = int32(0)]; + bool concat_181_interleave_0 = const()[name = string("concat_181_interleave_0"), val = bool(false)]; + tensor concat_181 = concat(axis = concat_181_axis_0, interleave = concat_181_interleave_0, values = (concat_181_values0_0, concat_181_values1_0, expand_dims_131, concat_181_values3_0))[name = string("concat_181")]; + tensor k_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = k_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = k_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_9_stride_0, update = linear_65_cast_fp16, x = coreml_update_state_38)[name = string("k_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_9_cast_fp16, input = k_cache1)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_40 = read_state(input = k_cache1)[name = string("coreml_update_state_40")]; + tensor v_cache1_internal_tensor_assign_9_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_9_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_180, begin_mask = v_cache1_internal_tensor_assign_9_begin_mask_0, end = concat_181, end_mask = v_cache1_internal_tensor_assign_9_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_9_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_9_stride_0, update = linear_66_cast_fp16, x = coreml_update_state_39)[name = string("v_cache1_internal_tensor_assign_9_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_9_cast_fp16, input = v_cache1)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_41 = read_state(input = v_cache1)[name = string("coreml_update_state_41")]; + int32 concat_186_values0_0 = const()[name = string("concat_186_values0_0"), val = int32(1)]; + int32 concat_186_values2_0 = const()[name = string("concat_186_values2_0"), val = int32(768)]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (concat_186_values0_0, end_step_19, concat_186_values2_0))[name = string("concat_186")]; + tensor var_1844_begin_0 = const()[name = string("op_1844_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1844_end_mask_0 = const()[name = string("op_1844_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1844_cast_fp16 = slice_by_index(begin = var_1844_begin_0, end = concat_186, end_mask = var_1844_end_mask_0, x = k_cache_33_cast_fp16)[name = string("op_1844_cast_fp16")]; + tensor var_1847_begin_0 = const()[name = string("op_1847_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1847_end_mask_0 = const()[name = string("op_1847_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = concat_186, end_mask = var_1847_end_mask_0, x = v_cache_33_cast_fp16)[name = string("op_1847_cast_fp16")]; + tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, -1, 12, 64])]; + tensor var_1857_cast_fp16 = reshape(shape = concat_188x, x = linear_64_cast_fp16)[name = string("op_1857_cast_fp16")]; + tensor const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_67_cast_fp16 = mul(x = var_1857_cast_fp16, y = const_92_to_fp16)[name = string("q_67_cast_fp16")]; + tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 12, 64])]; + tensor var_1864_cast_fp16 = reshape(shape = concat_189x, x = var_1844_cast_fp16)[name = string("op_1864_cast_fp16")]; + tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_85_cast_fp16 = mul(x = var_1864_cast_fp16, y = const_93_to_fp16)[name = string("k_85_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 12, 64])]; + tensor var_1871_cast_fp16 = reshape(shape = concat_190x, x = var_1847_cast_fp16)[name = string("op_1871_cast_fp16")]; + tensor var_1872 = const()[name = string("op_1872"), val = tensor([0, 2, 1, 3])]; + bool qk_49_transpose_x_0 = const()[name = string("qk_49_transpose_x_0"), val = bool(false)]; + bool qk_49_transpose_y_0 = const()[name = string("qk_49_transpose_y_0"), val = bool(false)]; + tensor transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = k_85_cast_fp16)[name = string("transpose_174")]; + tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = q_67_cast_fp16)[name = string("transpose_175")]; + tensor qk_49_cast_fp16 = matmul(transpose_x = qk_49_transpose_x_0, transpose_y = qk_49_transpose_y_0, x = transpose_129, y = transpose_130)[name = string("qk_49_cast_fp16")]; + int32 concat_191_values1_0 = const()[name = string("concat_191_values1_0"), val = int32(448)]; + int32 concat_191_axis_0 = const()[name = string("concat_191_axis_0"), val = int32(0)]; + bool concat_191_interleave_0 = const()[name = string("concat_191_interleave_0"), val = bool(false)]; + tensor concat_191 = concat(axis = concat_191_axis_0, interleave = concat_191_interleave_0, values = (gather_98_cast_uint16_to_int32, concat_191_values1_0))[name = string("concat_191")]; + tensor var_1875_begin_0 = const()[name = string("op_1875_begin_0"), val = tensor([0, 0])]; + tensor var_1875_end_mask_0 = const()[name = string("op_1875_end_mask_0"), val = tensor([false, true])]; + tensor var_1875_cast_fp16 = slice_by_index(begin = var_1875_begin_0, end = concat_191, end_mask = var_1875_end_mask_0, x = mask_to_fp16)[name = string("op_1875_cast_fp16")]; + int32 concat_192_values0_0 = const()[name = string("concat_192_values0_0"), val = int32(0)]; + int32 concat_192_axis_0 = const()[name = string("concat_192_axis_0"), val = int32(0)]; + bool concat_192_interleave_0 = const()[name = string("concat_192_interleave_0"), val = bool(false)]; + tensor concat_192 = concat(axis = concat_192_axis_0, interleave = concat_192_interleave_0, values = (concat_192_values0_0, gather_98_cast_uint16_to_int32))[name = string("concat_192")]; + tensor var_1876_begin_0 = const()[name = string("op_1876_begin_0"), val = tensor([0, 0])]; + tensor var_1876_end_mask_0 = const()[name = string("op_1876_end_mask_0"), val = tensor([true, false])]; + tensor var_1876_cast_fp16 = slice_by_index(begin = var_1876_begin_0, end = concat_192, end_mask = var_1876_end_mask_0, x = var_1875_cast_fp16)[name = string("op_1876_cast_fp16")]; + tensor qk_51_cast_fp16 = add(x = qk_49_cast_fp16, y = var_1876_cast_fp16)[name = string("qk_51_cast_fp16")]; + tensor var_1879_cast_fp16 = softmax(axis = var_1788, x = qk_51_cast_fp16)[name = string("op_1879_cast_fp16")]; + bool var_1881_transpose_x_0 = const()[name = string("op_1881_transpose_x_0"), val = bool(false)]; + bool var_1881_transpose_y_0 = const()[name = string("op_1881_transpose_y_0"), val = bool(false)]; + tensor v_85_cast_fp16 = transpose(perm = var_1872, x = var_1871_cast_fp16)[name = string("transpose_176")]; + tensor var_1881_cast_fp16 = matmul(transpose_x = var_1881_transpose_x_0, transpose_y = var_1881_transpose_y_0, x = var_1879_cast_fp16, y = v_85_cast_fp16)[name = string("op_1881_cast_fp16")]; + tensor var_1882 = const()[name = string("op_1882"), val = tensor([0, 2, 1, 3])]; + tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 768])]; + tensor var_1883_cast_fp16 = transpose(perm = var_1882, x = var_1881_cast_fp16)[name = string("transpose_173")]; + tensor x_151_cast_fp16 = reshape(shape = concat_193x, x = var_1883_cast_fp16)[name = string("x_151_cast_fp16")]; + tensor var_1887_to_fp16 = const()[name = string("op_1887_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218933568)))]; + tensor var_1888_to_fp16 = const()[name = string("op_1888_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220113280)))]; + tensor linear_67_cast_fp16 = linear(bias = var_1888_to_fp16, weight = var_1887_to_fp16, x = x_151_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor x_153_cast_fp16 = add(x = x_147_cast_fp16, y = linear_67_cast_fp16)[name = string("x_153_cast_fp16")]; + tensor var_1895_axes_0 = const()[name = string("op_1895_axes_0"), val = tensor([-1])]; + tensor blocks_8_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220114880)))]; + tensor blocks_8_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220116480)))]; + tensor var_1895_cast_fp16 = layer_norm(axes = var_1895_axes_0, beta = blocks_8_cross_attn_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_cross_attn_ln_weight_to_fp16, x = x_153_cast_fp16)[name = string("op_1895_cast_fp16")]; + tensor var_1904_to_fp16 = const()[name = string("op_1904_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220118080)))]; + tensor var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221297792)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1905_to_fp16, weight = var_1904_to_fp16, x = var_1895_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor concat_194 = const()[name = string("concat_194"), val = tensor([0, 0, 0])]; + tensor concat_195 = const()[name = string("concat_195"), val = tensor([0, 1500, 0])]; + tensor k_87_internal_tensor_assign_1_stride_0 = const()[name = string("k_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_194, begin_mask = k_87_internal_tensor_assign_1_begin_mask_0, end = concat_195, end_mask = k_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_87_internal_tensor_assign_1_squeeze_mask_0, stride = k_87_internal_tensor_assign_1_stride_0, update = k_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("k_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_196 = const()[name = string("concat_196"), val = tensor([0, 0, 0])]; + tensor concat_197 = const()[name = string("concat_197"), val = tensor([0, 1500, 0])]; + tensor v_87_internal_tensor_assign_1_stride_0 = const()[name = string("v_87_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_87_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_87_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_87_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_87_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_196, begin_mask = v_87_internal_tensor_assign_1_begin_mask_0, end = concat_197, end_mask = v_87_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_87_internal_tensor_assign_1_squeeze_mask_0, stride = v_87_internal_tensor_assign_1_stride_0, update = v_cache_35_cast_fp16, x = k_7_to_fp16)[name = string("v_87_internal_tensor_assign_1_cast_fp16")]; + tensor concat_198x = const()[name = string("concat_198x"), val = tensor([1, -1, 12, 64])]; + tensor var_1925_cast_fp16 = reshape(shape = concat_198x, x = linear_68_cast_fp16)[name = string("op_1925_cast_fp16")]; + tensor const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_71_cast_fp16 = mul(x = var_1925_cast_fp16, y = const_94_to_fp16)[name = string("q_71_cast_fp16")]; + tensor var_1931 = const()[name = string("op_1931"), val = tensor([1, 1500, 12, -1])]; + tensor var_1932_cast_fp16 = reshape(shape = var_1931, x = k_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1932_cast_fp16")]; + tensor const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_89_cast_fp16 = mul(x = var_1932_cast_fp16, y = const_95_to_fp16)[name = string("k_89_cast_fp16")]; + tensor var_1938 = const()[name = string("op_1938"), val = tensor([1, 1500, 12, -1])]; + tensor var_1939_cast_fp16 = reshape(shape = var_1938, x = v_87_internal_tensor_assign_1_cast_fp16)[name = string("op_1939_cast_fp16")]; + tensor var_1940 = const()[name = string("op_1940"), val = tensor([0, 2, 1, 3])]; + bool qk_53_transpose_x_0 = const()[name = string("qk_53_transpose_x_0"), val = bool(false)]; + bool qk_53_transpose_y_0 = const()[name = string("qk_53_transpose_y_0"), val = bool(false)]; + tensor transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = k_89_cast_fp16)[name = string("transpose_170")]; + tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = q_71_cast_fp16)[name = string("transpose_171")]; + tensor qk_53_cast_fp16 = matmul(transpose_x = qk_53_transpose_x_0, transpose_y = qk_53_transpose_y_0, x = transpose_131, y = transpose_132)[name = string("qk_53_cast_fp16")]; + tensor var_1944_cast_fp16 = softmax(axis = var_1788, x = qk_53_cast_fp16)[name = string("op_1944_cast_fp16")]; + bool var_1946_transpose_x_0 = const()[name = string("op_1946_transpose_x_0"), val = bool(false)]; + bool var_1946_transpose_y_0 = const()[name = string("op_1946_transpose_y_0"), val = bool(false)]; + tensor v_89_cast_fp16 = transpose(perm = var_1940, x = var_1939_cast_fp16)[name = string("transpose_172")]; + tensor var_1946_cast_fp16 = matmul(transpose_x = var_1946_transpose_x_0, transpose_y = var_1946_transpose_y_0, x = var_1944_cast_fp16, y = v_89_cast_fp16)[name = string("op_1946_cast_fp16")]; + tensor var_1947 = const()[name = string("op_1947"), val = tensor([0, 2, 1, 3])]; + tensor concat_199x = const()[name = string("concat_199x"), val = tensor([1, -1, 768])]; + tensor var_1948_cast_fp16 = transpose(perm = var_1947, x = var_1946_cast_fp16)[name = string("transpose_169")]; + tensor x_157_cast_fp16 = reshape(shape = concat_199x, x = var_1948_cast_fp16)[name = string("x_157_cast_fp16")]; + tensor var_1952_to_fp16 = const()[name = string("op_1952_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221299392)))]; + tensor var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222479104)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1953_to_fp16, weight = var_1952_to_fp16, x = x_157_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_159_cast_fp16 = add(x = x_153_cast_fp16, y = linear_69_cast_fp16)[name = string("x_159_cast_fp16")]; + tensor var_1960_axes_0 = const()[name = string("op_1960_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222480704)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222482304)))]; + tensor var_1960_cast_fp16 = layer_norm(axes = var_1960_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_1794_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_159_cast_fp16)[name = string("op_1960_cast_fp16")]; + tensor var_1969_to_fp16 = const()[name = string("op_1969_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222483904)))]; + tensor var_1970_to_fp16 = const()[name = string("op_1970_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227202560)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1970_to_fp16, weight = var_1969_to_fp16, x = var_1960_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_163_mode_0 = const()[name = string("x_163_mode_0"), val = string("EXACT")]; + tensor x_163_cast_fp16 = gelu(mode = x_163_mode_0, x = linear_70_cast_fp16)[name = string("x_163_cast_fp16")]; + tensor var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227208768)))]; + tensor var_1976_to_fp16 = const()[name = string("op_1976_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231927424)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1976_to_fp16, weight = var_1975_to_fp16, x = x_163_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_165_cast_fp16 = add(x = x_159_cast_fp16, y = linear_71_cast_fp16)[name = string("x_165_cast_fp16")]; + tensor k_cache_37_begin_0 = const()[name = string("k_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_37_end_0 = const()[name = string("k_cache_37_end_0"), val = tensor([10, 1, 448, 768])]; + tensor k_cache_37_end_mask_0 = const()[name = string("k_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_37_squeeze_mask_0 = const()[name = string("k_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_37_cast_fp16 = slice_by_index(begin = k_cache_37_begin_0, end = k_cache_37_end_0, end_mask = k_cache_37_end_mask_0, squeeze_mask = k_cache_37_squeeze_mask_0, x = coreml_update_state_40)[name = string("k_cache_37_cast_fp16")]; + tensor v_cache_37_begin_0 = const()[name = string("v_cache_37_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_37_end_0 = const()[name = string("v_cache_37_end_0"), val = tensor([10, 1, 448, 768])]; + tensor v_cache_37_end_mask_0 = const()[name = string("v_cache_37_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_37_squeeze_mask_0 = const()[name = string("v_cache_37_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_37_cast_fp16 = slice_by_index(begin = v_cache_37_begin_0, end = v_cache_37_end_0, end_mask = v_cache_37_end_mask_0, squeeze_mask = v_cache_37_squeeze_mask_0, x = coreml_update_state_41)[name = string("v_cache_37_cast_fp16")]; + tensor k_cache_39_begin_0 = const()[name = string("k_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor k_cache_39_end_0 = const()[name = string("k_cache_39_end_0"), val = tensor([10, 1, 1500, 768])]; + tensor k_cache_39_end_mask_0 = const()[name = string("k_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_39_squeeze_mask_0 = const()[name = string("k_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_39_cast_fp16 = slice_by_index(begin = k_cache_39_begin_0, end = k_cache_39_end_0, end_mask = k_cache_39_end_mask_0, squeeze_mask = k_cache_39_squeeze_mask_0, x = read_state_2)[name = string("k_cache_39_cast_fp16")]; + tensor v_cache_39_begin_0 = const()[name = string("v_cache_39_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor v_cache_39_end_0 = const()[name = string("v_cache_39_end_0"), val = tensor([10, 1, 1500, 768])]; + tensor v_cache_39_end_mask_0 = const()[name = string("v_cache_39_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_39_squeeze_mask_0 = const()[name = string("v_cache_39_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_39_cast_fp16 = slice_by_index(begin = v_cache_39_begin_0, end = v_cache_39_end_0, end_mask = v_cache_39_end_mask_0, squeeze_mask = v_cache_39_squeeze_mask_0, x = read_state_3)[name = string("v_cache_39_cast_fp16")]; + int32 var_1999 = const()[name = string("op_1999"), val = int32(-1)]; + tensor var_2017_axes_0 = const()[name = string("op_2017_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231929024)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231930624)))]; + fp16 var_2005_to_fp16 = const()[name = string("op_2005_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2017_cast_fp16 = layer_norm(axes = var_2017_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_165_cast_fp16)[name = string("op_2017_cast_fp16")]; + tensor var_2028_to_fp16 = const()[name = string("op_2028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231932224)))]; + tensor var_2029_to_fp16 = const()[name = string("op_2029_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233111936)))]; + tensor linear_72_cast_fp16 = linear(bias = var_2029_to_fp16, weight = var_2028_to_fp16, x = var_2017_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor var_2032_to_fp16 = const()[name = string("op_2032_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233113536)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2032_to_fp16, x = var_2017_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor var_2036_to_fp16 = const()[name = string("op_2036_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234293248)))]; + tensor var_2037_to_fp16 = const()[name = string("op_2037_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235472960)))]; + tensor linear_74_cast_fp16 = linear(bias = var_2037_to_fp16, weight = var_2036_to_fp16, x = var_2017_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2039_shape_cast_fp16 = shape(x = linear_72_cast_fp16)[name = string("op_2039_shape_cast_fp16")]; + int32 gather_110_axis_0 = const()[name = string("gather_110_axis_0"), val = int32(0)]; + int32 gather_110_batch_dims_0 = const()[name = string("gather_110_batch_dims_0"), val = int32(0)]; + bool gather_110_validate_indices_0 = const()[name = string("gather_110_validate_indices_0"), val = bool(false)]; + string var_2039_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2039_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_110_to_uint16 = const()[name = string("select_110_to_uint16"), val = uint16(1)]; + tensor var_2039_shape_cast_fp16_to_uint16 = cast(dtype = var_2039_shape_cast_fp16_to_uint16_dtype_0, x = var_2039_shape_cast_fp16)[name = string("cast_132")]; + uint16 gather_110_cast_uint16 = gather(axis = gather_110_axis_0, batch_dims = gather_110_batch_dims_0, indices = select_110_to_uint16, validate_indices = gather_110_validate_indices_0, x = var_2039_shape_cast_fp16_to_uint16)[name = string("gather_110_cast_uint16")]; + string gather_110_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_110_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110_cast_uint16_to_int32 = cast(dtype = gather_110_cast_uint16_to_int32_dtype_0, x = gather_110_cast_uint16)[name = string("cast_131")]; + int32 end_step_21 = add(x = offset, y = gather_110_cast_uint16_to_int32)[name = string("end_step_21")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_146 = const()[name = string("expand_dims_146"), val = tensor([0])]; + tensor expand_dims_147_axes_0 = const()[name = string("expand_dims_147_axes_0"), val = tensor([0])]; + tensor expand_dims_147 = expand_dims(axes = expand_dims_147_axes_0, x = end_step_21)[name = string("expand_dims_147")]; + tensor concat_202_values0_0 = const()[name = string("concat_202_values0_0"), val = tensor([9])]; + int32 concat_202_axis_0 = const()[name = string("concat_202_axis_0"), val = int32(0)]; + bool concat_202_interleave_0 = const()[name = string("concat_202_interleave_0"), val = bool(false)]; + tensor concat_202 = concat(axis = concat_202_axis_0, interleave = concat_202_interleave_0, values = (concat_202_values0_0, expand_dims_144, expand_dims_1, expand_dims_146))[name = string("concat_202")]; + tensor concat_203_values0_0 = const()[name = string("concat_203_values0_0"), val = tensor([0])]; + tensor concat_203_values1_0 = const()[name = string("concat_203_values1_0"), val = tensor([0])]; + tensor concat_203_values3_0 = const()[name = string("concat_203_values3_0"), val = tensor([0])]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (concat_203_values0_0, concat_203_values1_0, expand_dims_147, concat_203_values3_0))[name = string("concat_203")]; + tensor k_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = k_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = k_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_10_stride_0, update = linear_73_cast_fp16, x = coreml_update_state_40)[name = string("k_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_10_cast_fp16, input = k_cache1)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_42 = read_state(input = k_cache1)[name = string("coreml_update_state_42")]; + tensor v_cache1_internal_tensor_assign_10_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_10_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_202, begin_mask = v_cache1_internal_tensor_assign_10_begin_mask_0, end = concat_203, end_mask = v_cache1_internal_tensor_assign_10_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_10_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_10_stride_0, update = linear_74_cast_fp16, x = coreml_update_state_41)[name = string("v_cache1_internal_tensor_assign_10_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_10_cast_fp16, input = v_cache1)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_43 = read_state(input = v_cache1)[name = string("coreml_update_state_43")]; + int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; + int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(768)]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, end_step_21, concat_208_values2_0))[name = string("concat_208")]; + tensor var_2055_begin_0 = const()[name = string("op_2055_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2055_end_mask_0 = const()[name = string("op_2055_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = concat_208, end_mask = var_2055_end_mask_0, x = k_cache_37_cast_fp16)[name = string("op_2055_cast_fp16")]; + tensor var_2058_begin_0 = const()[name = string("op_2058_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2058_end_mask_0 = const()[name = string("op_2058_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = concat_208, end_mask = var_2058_end_mask_0, x = v_cache_37_cast_fp16)[name = string("op_2058_cast_fp16")]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 12, 64])]; + tensor var_2068_cast_fp16 = reshape(shape = concat_210x, x = linear_72_cast_fp16)[name = string("op_2068_cast_fp16")]; + tensor const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_75_cast_fp16 = mul(x = var_2068_cast_fp16, y = const_96_to_fp16)[name = string("q_75_cast_fp16")]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 12, 64])]; + tensor var_2075_cast_fp16 = reshape(shape = concat_211x, x = var_2055_cast_fp16)[name = string("op_2075_cast_fp16")]; + tensor const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_95_cast_fp16 = mul(x = var_2075_cast_fp16, y = const_97_to_fp16)[name = string("k_95_cast_fp16")]; + tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 12, 64])]; + tensor var_2082_cast_fp16 = reshape(shape = concat_212x, x = var_2058_cast_fp16)[name = string("op_2082_cast_fp16")]; + tensor var_2083 = const()[name = string("op_2083"), val = tensor([0, 2, 1, 3])]; + bool qk_55_transpose_x_0 = const()[name = string("qk_55_transpose_x_0"), val = bool(false)]; + bool qk_55_transpose_y_0 = const()[name = string("qk_55_transpose_y_0"), val = bool(false)]; + tensor transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = k_95_cast_fp16)[name = string("transpose_166")]; + tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = q_75_cast_fp16)[name = string("transpose_167")]; + tensor qk_55_cast_fp16 = matmul(transpose_x = qk_55_transpose_x_0, transpose_y = qk_55_transpose_y_0, x = transpose_133, y = transpose_134)[name = string("qk_55_cast_fp16")]; + int32 concat_213_values1_0 = const()[name = string("concat_213_values1_0"), val = int32(448)]; + int32 concat_213_axis_0 = const()[name = string("concat_213_axis_0"), val = int32(0)]; + bool concat_213_interleave_0 = const()[name = string("concat_213_interleave_0"), val = bool(false)]; + tensor concat_213 = concat(axis = concat_213_axis_0, interleave = concat_213_interleave_0, values = (gather_110_cast_uint16_to_int32, concat_213_values1_0))[name = string("concat_213")]; + tensor var_2086_begin_0 = const()[name = string("op_2086_begin_0"), val = tensor([0, 0])]; + tensor var_2086_end_mask_0 = const()[name = string("op_2086_end_mask_0"), val = tensor([false, true])]; + tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = concat_213, end_mask = var_2086_end_mask_0, x = mask_to_fp16)[name = string("op_2086_cast_fp16")]; + int32 concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = int32(0)]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, gather_110_cast_uint16_to_int32))[name = string("concat_214")]; + tensor var_2087_begin_0 = const()[name = string("op_2087_begin_0"), val = tensor([0, 0])]; + tensor var_2087_end_mask_0 = const()[name = string("op_2087_end_mask_0"), val = tensor([true, false])]; + tensor var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = concat_214, end_mask = var_2087_end_mask_0, x = var_2086_cast_fp16)[name = string("op_2087_cast_fp16")]; + tensor qk_57_cast_fp16 = add(x = qk_55_cast_fp16, y = var_2087_cast_fp16)[name = string("qk_57_cast_fp16")]; + tensor var_2090_cast_fp16 = softmax(axis = var_1999, x = qk_57_cast_fp16)[name = string("op_2090_cast_fp16")]; + bool var_2092_transpose_x_0 = const()[name = string("op_2092_transpose_x_0"), val = bool(false)]; + bool var_2092_transpose_y_0 = const()[name = string("op_2092_transpose_y_0"), val = bool(false)]; + tensor v_95_cast_fp16 = transpose(perm = var_2083, x = var_2082_cast_fp16)[name = string("transpose_168")]; + tensor var_2092_cast_fp16 = matmul(transpose_x = var_2092_transpose_x_0, transpose_y = var_2092_transpose_y_0, x = var_2090_cast_fp16, y = v_95_cast_fp16)[name = string("op_2092_cast_fp16")]; + tensor var_2093 = const()[name = string("op_2093"), val = tensor([0, 2, 1, 3])]; + tensor concat_215x = const()[name = string("concat_215x"), val = tensor([1, -1, 768])]; + tensor var_2094_cast_fp16 = transpose(perm = var_2093, x = var_2092_cast_fp16)[name = string("transpose_165")]; + tensor x_169_cast_fp16 = reshape(shape = concat_215x, x = var_2094_cast_fp16)[name = string("x_169_cast_fp16")]; + tensor var_2098_to_fp16 = const()[name = string("op_2098_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235474560)))]; + tensor var_2099_to_fp16 = const()[name = string("op_2099_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236654272)))]; + tensor linear_75_cast_fp16 = linear(bias = var_2099_to_fp16, weight = var_2098_to_fp16, x = x_169_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor x_171_cast_fp16 = add(x = x_165_cast_fp16, y = linear_75_cast_fp16)[name = string("x_171_cast_fp16")]; + tensor var_2106_axes_0 = const()[name = string("op_2106_axes_0"), val = tensor([-1])]; + tensor blocks_9_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236655872)))]; + tensor blocks_9_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236657472)))]; + tensor var_2106_cast_fp16 = layer_norm(axes = var_2106_axes_0, beta = blocks_9_cross_attn_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_cross_attn_ln_weight_to_fp16, x = x_171_cast_fp16)[name = string("op_2106_cast_fp16")]; + tensor var_2115_to_fp16 = const()[name = string("op_2115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236659072)))]; + tensor var_2116_to_fp16 = const()[name = string("op_2116_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237838784)))]; + tensor linear_76_cast_fp16 = linear(bias = var_2116_to_fp16, weight = var_2115_to_fp16, x = var_2106_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor concat_216 = const()[name = string("concat_216"), val = tensor([0, 0, 0])]; + tensor concat_217 = const()[name = string("concat_217"), val = tensor([0, 1500, 0])]; + tensor k_97_internal_tensor_assign_1_stride_0 = const()[name = string("k_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_216, begin_mask = k_97_internal_tensor_assign_1_begin_mask_0, end = concat_217, end_mask = k_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_97_internal_tensor_assign_1_squeeze_mask_0, stride = k_97_internal_tensor_assign_1_stride_0, update = k_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("k_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_218 = const()[name = string("concat_218"), val = tensor([0, 0, 0])]; + tensor concat_219 = const()[name = string("concat_219"), val = tensor([0, 1500, 0])]; + tensor v_97_internal_tensor_assign_1_stride_0 = const()[name = string("v_97_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_97_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_97_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_97_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_97_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_218, begin_mask = v_97_internal_tensor_assign_1_begin_mask_0, end = concat_219, end_mask = v_97_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_97_internal_tensor_assign_1_squeeze_mask_0, stride = v_97_internal_tensor_assign_1_stride_0, update = v_cache_39_cast_fp16, x = k_7_to_fp16)[name = string("v_97_internal_tensor_assign_1_cast_fp16")]; + tensor concat_220x = const()[name = string("concat_220x"), val = tensor([1, -1, 12, 64])]; + tensor var_2136_cast_fp16 = reshape(shape = concat_220x, x = linear_76_cast_fp16)[name = string("op_2136_cast_fp16")]; + tensor const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_79_cast_fp16 = mul(x = var_2136_cast_fp16, y = const_98_to_fp16)[name = string("q_79_cast_fp16")]; + tensor var_2142 = const()[name = string("op_2142"), val = tensor([1, 1500, 12, -1])]; + tensor var_2143_cast_fp16 = reshape(shape = var_2142, x = k_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2143_cast_fp16")]; + tensor const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_99_cast_fp16 = mul(x = var_2143_cast_fp16, y = const_99_to_fp16)[name = string("k_99_cast_fp16")]; + tensor var_2149 = const()[name = string("op_2149"), val = tensor([1, 1500, 12, -1])]; + tensor var_2150_cast_fp16 = reshape(shape = var_2149, x = v_97_internal_tensor_assign_1_cast_fp16)[name = string("op_2150_cast_fp16")]; + tensor var_2151 = const()[name = string("op_2151"), val = tensor([0, 2, 1, 3])]; + bool qk_59_transpose_x_0 = const()[name = string("qk_59_transpose_x_0"), val = bool(false)]; + bool qk_59_transpose_y_0 = const()[name = string("qk_59_transpose_y_0"), val = bool(false)]; + tensor transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = k_99_cast_fp16)[name = string("transpose_162")]; + tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = q_79_cast_fp16)[name = string("transpose_163")]; + tensor qk_59_cast_fp16 = matmul(transpose_x = qk_59_transpose_x_0, transpose_y = qk_59_transpose_y_0, x = transpose_135, y = transpose_136)[name = string("qk_59_cast_fp16")]; + tensor var_2155_cast_fp16 = softmax(axis = var_1999, x = qk_59_cast_fp16)[name = string("op_2155_cast_fp16")]; + bool var_2157_transpose_x_0 = const()[name = string("op_2157_transpose_x_0"), val = bool(false)]; + bool var_2157_transpose_y_0 = const()[name = string("op_2157_transpose_y_0"), val = bool(false)]; + tensor v_99_cast_fp16 = transpose(perm = var_2151, x = var_2150_cast_fp16)[name = string("transpose_164")]; + tensor var_2157_cast_fp16 = matmul(transpose_x = var_2157_transpose_x_0, transpose_y = var_2157_transpose_y_0, x = var_2155_cast_fp16, y = v_99_cast_fp16)[name = string("op_2157_cast_fp16")]; + tensor var_2158 = const()[name = string("op_2158"), val = tensor([0, 2, 1, 3])]; + tensor concat_221x = const()[name = string("concat_221x"), val = tensor([1, -1, 768])]; + tensor var_2159_cast_fp16 = transpose(perm = var_2158, x = var_2157_cast_fp16)[name = string("transpose_161")]; + tensor x_175_cast_fp16 = reshape(shape = concat_221x, x = var_2159_cast_fp16)[name = string("x_175_cast_fp16")]; + tensor var_2163_to_fp16 = const()[name = string("op_2163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237840384)))]; + tensor var_2164_to_fp16 = const()[name = string("op_2164_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239020096)))]; + tensor linear_77_cast_fp16 = linear(bias = var_2164_to_fp16, weight = var_2163_to_fp16, x = x_175_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor x_177_cast_fp16 = add(x = x_171_cast_fp16, y = linear_77_cast_fp16)[name = string("x_177_cast_fp16")]; + tensor var_2171_axes_0 = const()[name = string("op_2171_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239021696)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239023296)))]; + tensor var_2171_cast_fp16 = layer_norm(axes = var_2171_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_2005_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_177_cast_fp16)[name = string("op_2171_cast_fp16")]; + tensor var_2180_to_fp16 = const()[name = string("op_2180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239024896)))]; + tensor var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243743552)))]; + tensor linear_78_cast_fp16 = linear(bias = var_2181_to_fp16, weight = var_2180_to_fp16, x = var_2171_cast_fp16)[name = string("linear_78_cast_fp16")]; + string x_181_mode_0 = const()[name = string("x_181_mode_0"), val = string("EXACT")]; + tensor x_181_cast_fp16 = gelu(mode = x_181_mode_0, x = linear_78_cast_fp16)[name = string("x_181_cast_fp16")]; + tensor var_2186_to_fp16 = const()[name = string("op_2186_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(243749760)))]; + tensor var_2187_to_fp16 = const()[name = string("op_2187_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248468416)))]; + tensor linear_79_cast_fp16 = linear(bias = var_2187_to_fp16, weight = var_2186_to_fp16, x = x_181_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor x_183_cast_fp16 = add(x = x_177_cast_fp16, y = linear_79_cast_fp16)[name = string("x_183_cast_fp16")]; + tensor k_cache_41_begin_0 = const()[name = string("k_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_41_end_0 = const()[name = string("k_cache_41_end_0"), val = tensor([11, 1, 448, 768])]; + tensor k_cache_41_end_mask_0 = const()[name = string("k_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_41_squeeze_mask_0 = const()[name = string("k_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_41_cast_fp16 = slice_by_index(begin = k_cache_41_begin_0, end = k_cache_41_end_0, end_mask = k_cache_41_end_mask_0, squeeze_mask = k_cache_41_squeeze_mask_0, x = coreml_update_state_42)[name = string("k_cache_41_cast_fp16")]; + tensor v_cache_41_begin_0 = const()[name = string("v_cache_41_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_41_end_0 = const()[name = string("v_cache_41_end_0"), val = tensor([11, 1, 448, 768])]; + tensor v_cache_41_end_mask_0 = const()[name = string("v_cache_41_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_41_squeeze_mask_0 = const()[name = string("v_cache_41_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_41_cast_fp16 = slice_by_index(begin = v_cache_41_begin_0, end = v_cache_41_end_0, end_mask = v_cache_41_end_mask_0, squeeze_mask = v_cache_41_squeeze_mask_0, x = coreml_update_state_43)[name = string("v_cache_41_cast_fp16")]; + tensor k_cache_43_begin_0 = const()[name = string("k_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor k_cache_43_end_0 = const()[name = string("k_cache_43_end_0"), val = tensor([11, 1, 1500, 768])]; + tensor k_cache_43_end_mask_0 = const()[name = string("k_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_43_squeeze_mask_0 = const()[name = string("k_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_43_cast_fp16 = slice_by_index(begin = k_cache_43_begin_0, end = k_cache_43_end_0, end_mask = k_cache_43_end_mask_0, squeeze_mask = k_cache_43_squeeze_mask_0, x = read_state_2)[name = string("k_cache_43_cast_fp16")]; + tensor v_cache_43_begin_0 = const()[name = string("v_cache_43_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor v_cache_43_end_0 = const()[name = string("v_cache_43_end_0"), val = tensor([11, 1, 1500, 768])]; + tensor v_cache_43_end_mask_0 = const()[name = string("v_cache_43_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_43_squeeze_mask_0 = const()[name = string("v_cache_43_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_43_cast_fp16 = slice_by_index(begin = v_cache_43_begin_0, end = v_cache_43_end_0, end_mask = v_cache_43_end_mask_0, squeeze_mask = v_cache_43_squeeze_mask_0, x = read_state_3)[name = string("v_cache_43_cast_fp16")]; + int32 var_2210 = const()[name = string("op_2210"), val = int32(-1)]; + tensor var_2228_axes_0 = const()[name = string("op_2228_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248470016)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248471616)))]; + fp16 var_2216_to_fp16 = const()[name = string("op_2216_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2228_cast_fp16 = layer_norm(axes = var_2228_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_183_cast_fp16)[name = string("op_2228_cast_fp16")]; + tensor var_2239_to_fp16 = const()[name = string("op_2239_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248473216)))]; + tensor var_2240_to_fp16 = const()[name = string("op_2240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249652928)))]; + tensor linear_80_cast_fp16 = linear(bias = var_2240_to_fp16, weight = var_2239_to_fp16, x = var_2228_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor var_2243_to_fp16 = const()[name = string("op_2243_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249654528)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2243_to_fp16, x = var_2228_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2247_to_fp16 = const()[name = string("op_2247_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250834240)))]; + tensor var_2248_to_fp16 = const()[name = string("op_2248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252013952)))]; + tensor linear_82_cast_fp16 = linear(bias = var_2248_to_fp16, weight = var_2247_to_fp16, x = var_2228_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor var_2250_shape_cast_fp16 = shape(x = linear_80_cast_fp16)[name = string("op_2250_shape_cast_fp16")]; + int32 gather_122_axis_0 = const()[name = string("gather_122_axis_0"), val = int32(0)]; + int32 gather_122_batch_dims_0 = const()[name = string("gather_122_batch_dims_0"), val = int32(0)]; + bool gather_122_validate_indices_0 = const()[name = string("gather_122_validate_indices_0"), val = bool(false)]; + string var_2250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_122_to_uint16 = const()[name = string("select_122_to_uint16"), val = uint16(1)]; + tensor var_2250_shape_cast_fp16_to_uint16 = cast(dtype = var_2250_shape_cast_fp16_to_uint16_dtype_0, x = var_2250_shape_cast_fp16)[name = string("cast_130")]; + uint16 gather_122_cast_uint16 = gather(axis = gather_122_axis_0, batch_dims = gather_122_batch_dims_0, indices = select_122_to_uint16, validate_indices = gather_122_validate_indices_0, x = var_2250_shape_cast_fp16_to_uint16)[name = string("gather_122_cast_uint16")]; + string gather_122_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_122_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_122_cast_uint16_to_int32 = cast(dtype = gather_122_cast_uint16_to_int32_dtype_0, x = gather_122_cast_uint16)[name = string("cast_129")]; + int32 end_step_23 = add(x = offset, y = gather_122_cast_uint16_to_int32)[name = string("end_step_23")]; + tensor expand_dims_160 = const()[name = string("expand_dims_160"), val = tensor([0])]; + tensor expand_dims_162 = const()[name = string("expand_dims_162"), val = tensor([0])]; + tensor expand_dims_163_axes_0 = const()[name = string("expand_dims_163_axes_0"), val = tensor([0])]; + tensor expand_dims_163 = expand_dims(axes = expand_dims_163_axes_0, x = end_step_23)[name = string("expand_dims_163")]; + tensor concat_224_values0_0 = const()[name = string("concat_224_values0_0"), val = tensor([10])]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (concat_224_values0_0, expand_dims_160, expand_dims_1, expand_dims_162))[name = string("concat_224")]; + tensor concat_225_values0_0 = const()[name = string("concat_225_values0_0"), val = tensor([0])]; + tensor concat_225_values1_0 = const()[name = string("concat_225_values1_0"), val = tensor([0])]; + tensor concat_225_values3_0 = const()[name = string("concat_225_values3_0"), val = tensor([0])]; + int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; + bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; + tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (concat_225_values0_0, concat_225_values1_0, expand_dims_163, concat_225_values3_0))[name = string("concat_225")]; + tensor k_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = k_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = k_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_11_stride_0, update = linear_81_cast_fp16, x = coreml_update_state_42)[name = string("k_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_11_cast_fp16, input = k_cache1)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_44 = read_state(input = k_cache1)[name = string("coreml_update_state_44")]; + tensor v_cache1_internal_tensor_assign_11_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_11_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_224, begin_mask = v_cache1_internal_tensor_assign_11_begin_mask_0, end = concat_225, end_mask = v_cache1_internal_tensor_assign_11_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_11_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_11_stride_0, update = linear_82_cast_fp16, x = coreml_update_state_43)[name = string("v_cache1_internal_tensor_assign_11_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_11_cast_fp16, input = v_cache1)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_45 = read_state(input = v_cache1)[name = string("coreml_update_state_45")]; + int32 concat_230_values0_0 = const()[name = string("concat_230_values0_0"), val = int32(1)]; + int32 concat_230_values2_0 = const()[name = string("concat_230_values2_0"), val = int32(768)]; + int32 concat_230_axis_0 = const()[name = string("concat_230_axis_0"), val = int32(0)]; + bool concat_230_interleave_0 = const()[name = string("concat_230_interleave_0"), val = bool(false)]; + tensor concat_230 = concat(axis = concat_230_axis_0, interleave = concat_230_interleave_0, values = (concat_230_values0_0, end_step_23, concat_230_values2_0))[name = string("concat_230")]; + tensor var_2266_begin_0 = const()[name = string("op_2266_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2266_end_mask_0 = const()[name = string("op_2266_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2266_cast_fp16 = slice_by_index(begin = var_2266_begin_0, end = concat_230, end_mask = var_2266_end_mask_0, x = k_cache_41_cast_fp16)[name = string("op_2266_cast_fp16")]; + tensor var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = concat_230, end_mask = var_2269_end_mask_0, x = v_cache_41_cast_fp16)[name = string("op_2269_cast_fp16")]; + tensor concat_232x = const()[name = string("concat_232x"), val = tensor([1, -1, 12, 64])]; + tensor var_2279_cast_fp16 = reshape(shape = concat_232x, x = linear_80_cast_fp16)[name = string("op_2279_cast_fp16")]; + tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_83_cast_fp16 = mul(x = var_2279_cast_fp16, y = const_100_to_fp16)[name = string("q_83_cast_fp16")]; + tensor concat_233x = const()[name = string("concat_233x"), val = tensor([1, -1, 12, 64])]; + tensor var_2286_cast_fp16 = reshape(shape = concat_233x, x = var_2266_cast_fp16)[name = string("op_2286_cast_fp16")]; + tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_105_cast_fp16 = mul(x = var_2286_cast_fp16, y = const_101_to_fp16)[name = string("k_105_cast_fp16")]; + tensor concat_234x = const()[name = string("concat_234x"), val = tensor([1, -1, 12, 64])]; + tensor var_2293_cast_fp16 = reshape(shape = concat_234x, x = var_2269_cast_fp16)[name = string("op_2293_cast_fp16")]; + tensor var_2294 = const()[name = string("op_2294"), val = tensor([0, 2, 1, 3])]; + bool qk_61_transpose_x_0 = const()[name = string("qk_61_transpose_x_0"), val = bool(false)]; + bool qk_61_transpose_y_0 = const()[name = string("qk_61_transpose_y_0"), val = bool(false)]; + tensor transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = k_105_cast_fp16)[name = string("transpose_158")]; + tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = q_83_cast_fp16)[name = string("transpose_159")]; + tensor qk_61_cast_fp16 = matmul(transpose_x = qk_61_transpose_x_0, transpose_y = qk_61_transpose_y_0, x = transpose_137, y = transpose_138)[name = string("qk_61_cast_fp16")]; + int32 concat_235_values1_0 = const()[name = string("concat_235_values1_0"), val = int32(448)]; + int32 concat_235_axis_0 = const()[name = string("concat_235_axis_0"), val = int32(0)]; + bool concat_235_interleave_0 = const()[name = string("concat_235_interleave_0"), val = bool(false)]; + tensor concat_235 = concat(axis = concat_235_axis_0, interleave = concat_235_interleave_0, values = (gather_122_cast_uint16_to_int32, concat_235_values1_0))[name = string("concat_235")]; + tensor var_2297_begin_0 = const()[name = string("op_2297_begin_0"), val = tensor([0, 0])]; + tensor var_2297_end_mask_0 = const()[name = string("op_2297_end_mask_0"), val = tensor([false, true])]; + tensor var_2297_cast_fp16 = slice_by_index(begin = var_2297_begin_0, end = concat_235, end_mask = var_2297_end_mask_0, x = mask_to_fp16)[name = string("op_2297_cast_fp16")]; + int32 concat_236_values0_0 = const()[name = string("concat_236_values0_0"), val = int32(0)]; + int32 concat_236_axis_0 = const()[name = string("concat_236_axis_0"), val = int32(0)]; + bool concat_236_interleave_0 = const()[name = string("concat_236_interleave_0"), val = bool(false)]; + tensor concat_236 = concat(axis = concat_236_axis_0, interleave = concat_236_interleave_0, values = (concat_236_values0_0, gather_122_cast_uint16_to_int32))[name = string("concat_236")]; + tensor var_2298_begin_0 = const()[name = string("op_2298_begin_0"), val = tensor([0, 0])]; + tensor var_2298_end_mask_0 = const()[name = string("op_2298_end_mask_0"), val = tensor([true, false])]; + tensor var_2298_cast_fp16 = slice_by_index(begin = var_2298_begin_0, end = concat_236, end_mask = var_2298_end_mask_0, x = var_2297_cast_fp16)[name = string("op_2298_cast_fp16")]; + tensor qk_63_cast_fp16 = add(x = qk_61_cast_fp16, y = var_2298_cast_fp16)[name = string("qk_63_cast_fp16")]; + tensor var_2301_cast_fp16 = softmax(axis = var_2210, x = qk_63_cast_fp16)[name = string("op_2301_cast_fp16")]; + bool var_2303_transpose_x_0 = const()[name = string("op_2303_transpose_x_0"), val = bool(false)]; + bool var_2303_transpose_y_0 = const()[name = string("op_2303_transpose_y_0"), val = bool(false)]; + tensor v_105_cast_fp16 = transpose(perm = var_2294, x = var_2293_cast_fp16)[name = string("transpose_160")]; + tensor var_2303_cast_fp16 = matmul(transpose_x = var_2303_transpose_x_0, transpose_y = var_2303_transpose_y_0, x = var_2301_cast_fp16, y = v_105_cast_fp16)[name = string("op_2303_cast_fp16")]; + tensor var_2304 = const()[name = string("op_2304"), val = tensor([0, 2, 1, 3])]; + tensor concat_237x = const()[name = string("concat_237x"), val = tensor([1, -1, 768])]; + tensor var_2305_cast_fp16 = transpose(perm = var_2304, x = var_2303_cast_fp16)[name = string("transpose_157")]; + tensor x_187_cast_fp16 = reshape(shape = concat_237x, x = var_2305_cast_fp16)[name = string("x_187_cast_fp16")]; + tensor var_2309_to_fp16 = const()[name = string("op_2309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252015552)))]; + tensor var_2310_to_fp16 = const()[name = string("op_2310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253195264)))]; + tensor linear_83_cast_fp16 = linear(bias = var_2310_to_fp16, weight = var_2309_to_fp16, x = x_187_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor x_189_cast_fp16 = add(x = x_183_cast_fp16, y = linear_83_cast_fp16)[name = string("x_189_cast_fp16")]; + tensor var_2317_axes_0 = const()[name = string("op_2317_axes_0"), val = tensor([-1])]; + tensor blocks_10_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253196864)))]; + tensor blocks_10_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253198464)))]; + tensor var_2317_cast_fp16 = layer_norm(axes = var_2317_axes_0, beta = blocks_10_cross_attn_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_cross_attn_ln_weight_to_fp16, x = x_189_cast_fp16)[name = string("op_2317_cast_fp16")]; + tensor var_2326_to_fp16 = const()[name = string("op_2326_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253200064)))]; + tensor var_2327_to_fp16 = const()[name = string("op_2327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254379776)))]; + tensor linear_84_cast_fp16 = linear(bias = var_2327_to_fp16, weight = var_2326_to_fp16, x = var_2317_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor concat_238 = const()[name = string("concat_238"), val = tensor([0, 0, 0])]; + tensor concat_239 = const()[name = string("concat_239"), val = tensor([0, 1500, 0])]; + tensor k_107_internal_tensor_assign_1_stride_0 = const()[name = string("k_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_238, begin_mask = k_107_internal_tensor_assign_1_begin_mask_0, end = concat_239, end_mask = k_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_107_internal_tensor_assign_1_squeeze_mask_0, stride = k_107_internal_tensor_assign_1_stride_0, update = k_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("k_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_240 = const()[name = string("concat_240"), val = tensor([0, 0, 0])]; + tensor concat_241 = const()[name = string("concat_241"), val = tensor([0, 1500, 0])]; + tensor v_107_internal_tensor_assign_1_stride_0 = const()[name = string("v_107_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_107_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_107_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_107_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_107_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_240, begin_mask = v_107_internal_tensor_assign_1_begin_mask_0, end = concat_241, end_mask = v_107_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_107_internal_tensor_assign_1_squeeze_mask_0, stride = v_107_internal_tensor_assign_1_stride_0, update = v_cache_43_cast_fp16, x = k_7_to_fp16)[name = string("v_107_internal_tensor_assign_1_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, -1, 12, 64])]; + tensor var_2347_cast_fp16 = reshape(shape = concat_242x, x = linear_84_cast_fp16)[name = string("op_2347_cast_fp16")]; + tensor const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_87_cast_fp16 = mul(x = var_2347_cast_fp16, y = const_102_to_fp16)[name = string("q_87_cast_fp16")]; + tensor var_2353 = const()[name = string("op_2353"), val = tensor([1, 1500, 12, -1])]; + tensor var_2354_cast_fp16 = reshape(shape = var_2353, x = k_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2354_cast_fp16")]; + tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_109_cast_fp16 = mul(x = var_2354_cast_fp16, y = const_103_to_fp16)[name = string("k_109_cast_fp16")]; + tensor var_2360 = const()[name = string("op_2360"), val = tensor([1, 1500, 12, -1])]; + tensor var_2361_cast_fp16 = reshape(shape = var_2360, x = v_107_internal_tensor_assign_1_cast_fp16)[name = string("op_2361_cast_fp16")]; + tensor var_2362 = const()[name = string("op_2362"), val = tensor([0, 2, 1, 3])]; + bool qk_65_transpose_x_0 = const()[name = string("qk_65_transpose_x_0"), val = bool(false)]; + bool qk_65_transpose_y_0 = const()[name = string("qk_65_transpose_y_0"), val = bool(false)]; + tensor transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = k_109_cast_fp16)[name = string("transpose_154")]; + tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = q_87_cast_fp16)[name = string("transpose_155")]; + tensor qk_65_cast_fp16 = matmul(transpose_x = qk_65_transpose_x_0, transpose_y = qk_65_transpose_y_0, x = transpose_139, y = transpose_140)[name = string("qk_65_cast_fp16")]; + tensor var_2366_cast_fp16 = softmax(axis = var_2210, x = qk_65_cast_fp16)[name = string("op_2366_cast_fp16")]; + bool var_2368_transpose_x_0 = const()[name = string("op_2368_transpose_x_0"), val = bool(false)]; + bool var_2368_transpose_y_0 = const()[name = string("op_2368_transpose_y_0"), val = bool(false)]; + tensor v_109_cast_fp16 = transpose(perm = var_2362, x = var_2361_cast_fp16)[name = string("transpose_156")]; + tensor var_2368_cast_fp16 = matmul(transpose_x = var_2368_transpose_x_0, transpose_y = var_2368_transpose_y_0, x = var_2366_cast_fp16, y = v_109_cast_fp16)[name = string("op_2368_cast_fp16")]; + tensor var_2369 = const()[name = string("op_2369"), val = tensor([0, 2, 1, 3])]; + tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, -1, 768])]; + tensor var_2370_cast_fp16 = transpose(perm = var_2369, x = var_2368_cast_fp16)[name = string("transpose_153")]; + tensor x_193_cast_fp16 = reshape(shape = concat_243x, x = var_2370_cast_fp16)[name = string("x_193_cast_fp16")]; + tensor var_2374_to_fp16 = const()[name = string("op_2374_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254381376)))]; + tensor var_2375_to_fp16 = const()[name = string("op_2375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255561088)))]; + tensor linear_85_cast_fp16 = linear(bias = var_2375_to_fp16, weight = var_2374_to_fp16, x = x_193_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor x_195_cast_fp16 = add(x = x_189_cast_fp16, y = linear_85_cast_fp16)[name = string("x_195_cast_fp16")]; + tensor var_2382_axes_0 = const()[name = string("op_2382_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255562688)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255564288)))]; + tensor var_2382_cast_fp16 = layer_norm(axes = var_2382_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_2216_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_195_cast_fp16)[name = string("op_2382_cast_fp16")]; + tensor var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(255565888)))]; + tensor var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260284544)))]; + tensor linear_86_cast_fp16 = linear(bias = var_2392_to_fp16, weight = var_2391_to_fp16, x = var_2382_cast_fp16)[name = string("linear_86_cast_fp16")]; + string x_199_mode_0 = const()[name = string("x_199_mode_0"), val = string("EXACT")]; + tensor x_199_cast_fp16 = gelu(mode = x_199_mode_0, x = linear_86_cast_fp16)[name = string("x_199_cast_fp16")]; + tensor var_2397_to_fp16 = const()[name = string("op_2397_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260290752)))]; + tensor var_2398_to_fp16 = const()[name = string("op_2398_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265009408)))]; + tensor linear_87_cast_fp16 = linear(bias = var_2398_to_fp16, weight = var_2397_to_fp16, x = x_199_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor x_201_cast_fp16 = add(x = x_195_cast_fp16, y = linear_87_cast_fp16)[name = string("x_201_cast_fp16")]; + tensor k_cache_45_begin_0 = const()[name = string("k_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_45_end_0 = const()[name = string("k_cache_45_end_0"), val = tensor([12, 1, 448, 768])]; + tensor k_cache_45_end_mask_0 = const()[name = string("k_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_45_squeeze_mask_0 = const()[name = string("k_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_45_cast_fp16 = slice_by_index(begin = k_cache_45_begin_0, end = k_cache_45_end_0, end_mask = k_cache_45_end_mask_0, squeeze_mask = k_cache_45_squeeze_mask_0, x = coreml_update_state_44)[name = string("k_cache_45_cast_fp16")]; + tensor v_cache_45_begin_0 = const()[name = string("v_cache_45_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_45_end_0 = const()[name = string("v_cache_45_end_0"), val = tensor([12, 1, 448, 768])]; + tensor v_cache_45_end_mask_0 = const()[name = string("v_cache_45_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_45_squeeze_mask_0 = const()[name = string("v_cache_45_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_45_cast_fp16 = slice_by_index(begin = v_cache_45_begin_0, end = v_cache_45_end_0, end_mask = v_cache_45_end_mask_0, squeeze_mask = v_cache_45_squeeze_mask_0, x = coreml_update_state_45)[name = string("v_cache_45_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([12, 1, 1500, 768])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([12, 1, 1500, 768])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_2421 = const()[name = string("op_2421"), val = int32(-1)]; + tensor var_2439_axes_0 = const()[name = string("op_2439_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265011008)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265012608)))]; + fp16 var_2427_to_fp16 = const()[name = string("op_2427_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2439_cast_fp16 = layer_norm(axes = var_2439_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_201_cast_fp16)[name = string("op_2439_cast_fp16")]; + tensor var_2450_to_fp16 = const()[name = string("op_2450_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265014208)))]; + tensor var_2451_to_fp16 = const()[name = string("op_2451_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266193920)))]; + tensor linear_88_cast_fp16 = linear(bias = var_2451_to_fp16, weight = var_2450_to_fp16, x = var_2439_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2454_to_fp16 = const()[name = string("op_2454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266195520)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_2454_to_fp16, x = var_2439_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor var_2458_to_fp16 = const()[name = string("op_2458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267375232)))]; + tensor var_2459_to_fp16 = const()[name = string("op_2459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268554944)))]; + tensor linear_90_cast_fp16 = linear(bias = var_2459_to_fp16, weight = var_2458_to_fp16, x = var_2439_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor var_2461_shape_cast_fp16 = shape(x = linear_88_cast_fp16)[name = string("op_2461_shape_cast_fp16")]; + int32 gather_134_axis_0 = const()[name = string("gather_134_axis_0"), val = int32(0)]; + int32 gather_134_batch_dims_0 = const()[name = string("gather_134_batch_dims_0"), val = int32(0)]; + bool gather_134_validate_indices_0 = const()[name = string("gather_134_validate_indices_0"), val = bool(false)]; + string var_2461_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2461_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_134_to_uint16 = const()[name = string("select_134_to_uint16"), val = uint16(1)]; + tensor var_2461_shape_cast_fp16_to_uint16 = cast(dtype = var_2461_shape_cast_fp16_to_uint16_dtype_0, x = var_2461_shape_cast_fp16)[name = string("cast_128")]; + uint16 gather_134_cast_uint16 = gather(axis = gather_134_axis_0, batch_dims = gather_134_batch_dims_0, indices = select_134_to_uint16, validate_indices = gather_134_validate_indices_0, x = var_2461_shape_cast_fp16_to_uint16)[name = string("gather_134_cast_uint16")]; + string gather_134_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_134_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_134_cast_uint16_to_int32 = cast(dtype = gather_134_cast_uint16_to_int32_dtype_0, x = gather_134_cast_uint16)[name = string("cast_127")]; + int32 end_step = add(x = offset, y = gather_134_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_176 = const()[name = string("expand_dims_176"), val = tensor([0])]; + tensor expand_dims_178 = const()[name = string("expand_dims_178"), val = tensor([0])]; + tensor expand_dims_179_axes_0 = const()[name = string("expand_dims_179_axes_0"), val = tensor([0])]; + tensor expand_dims_179 = expand_dims(axes = expand_dims_179_axes_0, x = end_step)[name = string("expand_dims_179")]; + tensor concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = tensor([11])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, expand_dims_176, expand_dims_1, expand_dims_178))[name = string("concat_246")]; + tensor concat_247_values0_0 = const()[name = string("concat_247_values0_0"), val = tensor([0])]; + tensor concat_247_values1_0 = const()[name = string("concat_247_values1_0"), val = tensor([0])]; + tensor concat_247_values3_0 = const()[name = string("concat_247_values3_0"), val = tensor([0])]; + int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; + bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; + tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (concat_247_values0_0, concat_247_values1_0, expand_dims_179, concat_247_values3_0))[name = string("concat_247")]; + tensor k_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = k_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = k_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_12_stride_0, update = linear_89_cast_fp16, x = coreml_update_state_44)[name = string("k_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_12_cast_fp16, input = k_cache1)[name = string("coreml_update_state_46_write_state")]; + tensor v_cache1_internal_tensor_assign_12_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_12_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_246, begin_mask = v_cache1_internal_tensor_assign_12_begin_mask_0, end = concat_247, end_mask = v_cache1_internal_tensor_assign_12_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_12_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_12_stride_0, update = linear_90_cast_fp16, x = coreml_update_state_45)[name = string("v_cache1_internal_tensor_assign_12_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_12_cast_fp16, input = v_cache1)[name = string("coreml_update_state_47_write_state")]; + int32 concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = int32(1)]; + int32 concat_252_values2_0 = const()[name = string("concat_252_values2_0"), val = int32(768)]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, end_step, concat_252_values2_0))[name = string("concat_252")]; + tensor var_2477_begin_0 = const()[name = string("op_2477_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2477_end_mask_0 = const()[name = string("op_2477_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2477_cast_fp16 = slice_by_index(begin = var_2477_begin_0, end = concat_252, end_mask = var_2477_end_mask_0, x = k_cache_45_cast_fp16)[name = string("op_2477_cast_fp16")]; + tensor var_2480_begin_0 = const()[name = string("op_2480_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2480_end_mask_0 = const()[name = string("op_2480_end_mask_0"), val = tensor([true, false, true])]; + tensor var_2480_cast_fp16 = slice_by_index(begin = var_2480_begin_0, end = concat_252, end_mask = var_2480_end_mask_0, x = v_cache_45_cast_fp16)[name = string("op_2480_cast_fp16")]; + tensor concat_254x = const()[name = string("concat_254x"), val = tensor([1, -1, 12, 64])]; + tensor var_2490_cast_fp16 = reshape(shape = concat_254x, x = linear_88_cast_fp16)[name = string("op_2490_cast_fp16")]; + tensor const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_91_cast_fp16 = mul(x = var_2490_cast_fp16, y = const_104_to_fp16)[name = string("q_91_cast_fp16")]; + tensor concat_255x = const()[name = string("concat_255x"), val = tensor([1, -1, 12, 64])]; + tensor var_2497_cast_fp16 = reshape(shape = concat_255x, x = var_2477_cast_fp16)[name = string("op_2497_cast_fp16")]; + tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_115_cast_fp16 = mul(x = var_2497_cast_fp16, y = const_105_to_fp16)[name = string("k_115_cast_fp16")]; + tensor concat_256x = const()[name = string("concat_256x"), val = tensor([1, -1, 12, 64])]; + tensor var_2504_cast_fp16 = reshape(shape = concat_256x, x = var_2480_cast_fp16)[name = string("op_2504_cast_fp16")]; + tensor var_2505 = const()[name = string("op_2505"), val = tensor([0, 2, 1, 3])]; + bool qk_67_transpose_x_0 = const()[name = string("qk_67_transpose_x_0"), val = bool(false)]; + bool qk_67_transpose_y_0 = const()[name = string("qk_67_transpose_y_0"), val = bool(false)]; + tensor transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = k_115_cast_fp16)[name = string("transpose_150")]; + tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = q_91_cast_fp16)[name = string("transpose_151")]; + tensor qk_67_cast_fp16 = matmul(transpose_x = qk_67_transpose_x_0, transpose_y = qk_67_transpose_y_0, x = transpose_141, y = transpose_142)[name = string("qk_67_cast_fp16")]; + int32 concat_257_values1_0 = const()[name = string("concat_257_values1_0"), val = int32(448)]; + int32 concat_257_axis_0 = const()[name = string("concat_257_axis_0"), val = int32(0)]; + bool concat_257_interleave_0 = const()[name = string("concat_257_interleave_0"), val = bool(false)]; + tensor concat_257 = concat(axis = concat_257_axis_0, interleave = concat_257_interleave_0, values = (gather_134_cast_uint16_to_int32, concat_257_values1_0))[name = string("concat_257")]; + tensor var_2508_begin_0 = const()[name = string("op_2508_begin_0"), val = tensor([0, 0])]; + tensor var_2508_end_mask_0 = const()[name = string("op_2508_end_mask_0"), val = tensor([false, true])]; + tensor var_2508_cast_fp16 = slice_by_index(begin = var_2508_begin_0, end = concat_257, end_mask = var_2508_end_mask_0, x = mask_to_fp16)[name = string("op_2508_cast_fp16")]; + int32 concat_258_values0_0 = const()[name = string("concat_258_values0_0"), val = int32(0)]; + int32 concat_258_axis_0 = const()[name = string("concat_258_axis_0"), val = int32(0)]; + bool concat_258_interleave_0 = const()[name = string("concat_258_interleave_0"), val = bool(false)]; + tensor concat_258 = concat(axis = concat_258_axis_0, interleave = concat_258_interleave_0, values = (concat_258_values0_0, gather_134_cast_uint16_to_int32))[name = string("concat_258")]; + tensor var_2509_begin_0 = const()[name = string("op_2509_begin_0"), val = tensor([0, 0])]; + tensor var_2509_end_mask_0 = const()[name = string("op_2509_end_mask_0"), val = tensor([true, false])]; + tensor var_2509_cast_fp16 = slice_by_index(begin = var_2509_begin_0, end = concat_258, end_mask = var_2509_end_mask_0, x = var_2508_cast_fp16)[name = string("op_2509_cast_fp16")]; + tensor qk_69_cast_fp16 = add(x = qk_67_cast_fp16, y = var_2509_cast_fp16)[name = string("qk_69_cast_fp16")]; + tensor var_2512_cast_fp16 = softmax(axis = var_2421, x = qk_69_cast_fp16)[name = string("op_2512_cast_fp16")]; + bool var_2514_transpose_x_0 = const()[name = string("op_2514_transpose_x_0"), val = bool(false)]; + bool var_2514_transpose_y_0 = const()[name = string("op_2514_transpose_y_0"), val = bool(false)]; + tensor v_115_cast_fp16 = transpose(perm = var_2505, x = var_2504_cast_fp16)[name = string("transpose_152")]; + tensor var_2514_cast_fp16 = matmul(transpose_x = var_2514_transpose_x_0, transpose_y = var_2514_transpose_y_0, x = var_2512_cast_fp16, y = v_115_cast_fp16)[name = string("op_2514_cast_fp16")]; + tensor var_2515 = const()[name = string("op_2515"), val = tensor([0, 2, 1, 3])]; + tensor concat_259x = const()[name = string("concat_259x"), val = tensor([1, -1, 768])]; + tensor var_2516_cast_fp16 = transpose(perm = var_2515, x = var_2514_cast_fp16)[name = string("transpose_149")]; + tensor x_205_cast_fp16 = reshape(shape = concat_259x, x = var_2516_cast_fp16)[name = string("x_205_cast_fp16")]; + tensor var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(268556544)))]; + tensor var_2521_to_fp16 = const()[name = string("op_2521_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269736256)))]; + tensor linear_91_cast_fp16 = linear(bias = var_2521_to_fp16, weight = var_2520_to_fp16, x = x_205_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_91_cast_fp16)[name = string("x_207_cast_fp16")]; + tensor var_2528_axes_0 = const()[name = string("op_2528_axes_0"), val = tensor([-1])]; + tensor blocks_11_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269737856)))]; + tensor blocks_11_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269739456)))]; + tensor var_2528_cast_fp16 = layer_norm(axes = var_2528_axes_0, beta = blocks_11_cross_attn_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_cross_attn_ln_weight_to_fp16, x = x_207_cast_fp16)[name = string("op_2528_cast_fp16")]; + tensor var_2537_to_fp16 = const()[name = string("op_2537_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269741056)))]; + tensor var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270920768)))]; + tensor linear_92_cast_fp16 = linear(bias = var_2538_to_fp16, weight = var_2537_to_fp16, x = var_2528_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor concat_260 = const()[name = string("concat_260"), val = tensor([0, 0, 0])]; + tensor concat_261 = const()[name = string("concat_261"), val = tensor([0, 1500, 0])]; + tensor k_117_internal_tensor_assign_1_stride_0 = const()[name = string("k_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_260, begin_mask = k_117_internal_tensor_assign_1_begin_mask_0, end = concat_261, end_mask = k_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_117_internal_tensor_assign_1_squeeze_mask_0, stride = k_117_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_262 = const()[name = string("concat_262"), val = tensor([0, 0, 0])]; + tensor concat_263 = const()[name = string("concat_263"), val = tensor([0, 1500, 0])]; + tensor v_117_internal_tensor_assign_1_stride_0 = const()[name = string("v_117_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_117_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_117_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_117_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_117_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_262, begin_mask = v_117_internal_tensor_assign_1_begin_mask_0, end = concat_263, end_mask = v_117_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_117_internal_tensor_assign_1_squeeze_mask_0, stride = v_117_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_117_internal_tensor_assign_1_cast_fp16")]; + tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, -1, 12, 64])]; + tensor var_2558_cast_fp16 = reshape(shape = concat_264x, x = linear_92_cast_fp16)[name = string("op_2558_cast_fp16")]; + tensor const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_2558_cast_fp16, y = const_106_to_fp16)[name = string("q_cast_fp16")]; + tensor var_2564 = const()[name = string("op_2564"), val = tensor([1, 1500, 12, -1])]; + tensor var_2565_cast_fp16 = reshape(shape = var_2564, x = k_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2565_cast_fp16")]; + tensor const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_2565_cast_fp16, y = const_107_to_fp16)[name = string("k_cast_fp16")]; + tensor var_2571 = const()[name = string("op_2571"), val = tensor([1, 1500, 12, -1])]; + tensor var_2572_cast_fp16 = reshape(shape = var_2571, x = v_117_internal_tensor_assign_1_cast_fp16)[name = string("op_2572_cast_fp16")]; + tensor var_2573 = const()[name = string("op_2573"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_144_perm_0 = const()[name = string("transpose_144_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_144 = transpose(perm = transpose_144_perm_0, x = k_cast_fp16)[name = string("transpose_146")]; + tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = q_cast_fp16)[name = string("transpose_147")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_143, y = transpose_144)[name = string("qk_cast_fp16")]; + tensor var_2577_cast_fp16 = softmax(axis = var_2421, x = qk_cast_fp16)[name = string("op_2577_cast_fp16")]; + bool var_2579_transpose_x_0 = const()[name = string("op_2579_transpose_x_0"), val = bool(false)]; + bool var_2579_transpose_y_0 = const()[name = string("op_2579_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_2573, x = var_2572_cast_fp16)[name = string("transpose_148")]; + tensor var_2579_cast_fp16 = matmul(transpose_x = var_2579_transpose_x_0, transpose_y = var_2579_transpose_y_0, x = var_2577_cast_fp16, y = v_cast_fp16)[name = string("op_2579_cast_fp16")]; + tensor var_2580 = const()[name = string("op_2580"), val = tensor([0, 2, 1, 3])]; + tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 768])]; + tensor var_2581_cast_fp16 = transpose(perm = var_2580, x = var_2579_cast_fp16)[name = string("transpose_145")]; + tensor x_211_cast_fp16 = reshape(shape = concat_265x, x = var_2581_cast_fp16)[name = string("x_211_cast_fp16")]; + tensor var_2585_to_fp16 = const()[name = string("op_2585_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270922368)))]; + tensor var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272102080)))]; + tensor linear_93_cast_fp16 = linear(bias = var_2586_to_fp16, weight = var_2585_to_fp16, x = x_211_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor x_213_cast_fp16 = add(x = x_207_cast_fp16, y = linear_93_cast_fp16)[name = string("x_213_cast_fp16")]; + tensor var_2593_axes_0 = const()[name = string("op_2593_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272103680)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272105280)))]; + tensor var_2593_cast_fp16 = layer_norm(axes = var_2593_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_2427_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_213_cast_fp16)[name = string("op_2593_cast_fp16")]; + tensor var_2602_to_fp16 = const()[name = string("op_2602_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272106880)))]; + tensor var_2603_to_fp16 = const()[name = string("op_2603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276825536)))]; + tensor linear_94_cast_fp16 = linear(bias = var_2603_to_fp16, weight = var_2602_to_fp16, x = var_2593_cast_fp16)[name = string("linear_94_cast_fp16")]; + string x_217_mode_0 = const()[name = string("x_217_mode_0"), val = string("EXACT")]; + tensor x_217_cast_fp16 = gelu(mode = x_217_mode_0, x = linear_94_cast_fp16)[name = string("x_217_cast_fp16")]; + tensor var_2608_to_fp16 = const()[name = string("op_2608_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276831744)))]; + tensor var_2609_to_fp16 = const()[name = string("op_2609_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281550400)))]; + tensor linear_95_cast_fp16 = linear(bias = var_2609_to_fp16, weight = var_2608_to_fp16, x = x_217_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor x_219_cast_fp16 = add(x = x_213_cast_fp16, y = linear_95_cast_fp16)[name = string("x_219_cast_fp16")]; + tensor var_2622_axes_0 = const()[name = string("op_2622_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281552000)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281553600)))]; + fp16 var_2613_to_fp16 = const()[name = string("op_2613_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2622_cast_fp16 = layer_norm(axes = var_2622_axes_0, beta = ln_bias_to_fp16, epsilon = var_2613_to_fp16, gamma = ln_weight_to_fp16, x = x_219_cast_fp16)[name = string("op_2622_cast_fp16")]; + tensor var_2632_bias_0_to_fp16 = const()[name = string("op_2632_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(281555200)))]; + tensor logits = linear(bias = var_2632_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_2622_cast_fp16)[name = string("op_2632_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/small/decoder_second.mlmodelc/weights/weight.bin b/small/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..90ca561c70632e65de0842117756ea468a571c60 --- /dev/null +++ b/small/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3337ba112e828dc28889bc3f1f5a2dedddd2f25e867247ab569aa2ff8df7f4f +size 281658994 diff --git a/small/encoder.mlmodelc/analytics/coremldata.bin b/small/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..f31b754669e1303c2372c9342aab1b63bd7cab93 --- /dev/null +++ b/small/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a641e986ed9ea8170754f3317c8f0b565bc752fd791d159468a3f6a62fe1757 +size 243 diff --git a/small/encoder.mlmodelc/coremldata.bin b/small/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..e06088a447412c3ca2fa17a18981aa7cb7a92bc7 --- /dev/null +++ b/small/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268793fee4ed1a8616dc360502d6cafd32d4074c9a50651d775d02cc62fbc1a3 +size 318 diff --git a/small/encoder.mlmodelc/metadata.json b/small/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d4d4ea5de53e1dd5f75562828a806b102bcbd520 --- /dev/null +++ b/small/encoder.mlmodelc/metadata.json @@ -0,0 +1,69 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 768)", + "shortDescription" : "", + "shape" : "[1, 1500, 768]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.mul" : 24, + "Ios18.softmax" : 12, + "Ios18.linear" : 72, + "Ios18.gelu" : 14, + "Ios18.layerNorm" : 25, + "Ios18.transpose" : 49, + "Ios18.matmul" : 24, + "Ios18.conv" : 2, + "Ios18.add" : 25, + "Ios18.reshape" : 48 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 80 × 3000)", + "shortDescription" : "", + "shape" : "[1, 80, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "encoder", + "method" : "predict" + } +] \ No newline at end of file diff --git a/small/encoder.mlmodelc/model.mil b/small/encoder.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..410fe63661e7fb031279aa0c89684ec2cc4eb33f --- /dev/null +++ b/small/encoder.mlmodelc/model.mil @@ -0,0 +1,732 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor logmel_data) { + string var_44_pad_type_0 = const()[name = string("op_44_pad_type_0"), val = string("custom")]; + tensor var_44_pad_0 = const()[name = string("op_44_pad_0"), val = tensor([1, 1])]; + tensor var_44_strides_0 = const()[name = string("op_44_strides_0"), val = tensor([1])]; + tensor var_44_dilations_0 = const()[name = string("op_44_dilations_0"), val = tensor([1])]; + int32 var_44_groups_0 = const()[name = string("op_44_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))]; + tensor var_44_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_44_dilations_0, groups = var_44_groups_0, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_44_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_44_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_44_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_62_pad_type_0 = const()[name = string("op_62_pad_type_0"), val = string("custom")]; + tensor var_62_pad_0 = const()[name = string("op_62_pad_0"), val = tensor([1, 1])]; + tensor var_62_strides_0 = const()[name = string("op_62_strides_0"), val = tensor([2])]; + tensor var_62_dilations_0 = const()[name = string("op_62_dilations_0"), val = tensor([1])]; + int32 var_62_groups_0 = const()[name = string("op_62_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3909376)))]; + tensor var_62_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_62_dilations_0, groups = var_62_groups_0, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_62_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_62_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_62_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_68 = const()[name = string("op_68"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3910976)))]; + tensor x_5_cast_fp16 = transpose(perm = var_68, x = x_3_cast_fp16)[name = string("transpose_120")]; + tensor var_71_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_71_cast_fp16")]; + int32 var_84 = const()[name = string("op_84"), val = int32(-1)]; + tensor var_100_axes_0 = const()[name = string("op_100_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6215040)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6216640)))]; + fp16 var_90_to_fp16 = const()[name = string("op_90_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_100_cast_fp16 = layer_norm(axes = var_100_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_71_cast_fp16)[name = string("op_100_cast_fp16")]; + tensor var_111_to_fp16 = const()[name = string("op_111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6218240)))]; + tensor var_112_to_fp16 = const()[name = string("op_112_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7397952)))]; + tensor linear_0_cast_fp16 = linear(bias = var_112_to_fp16, weight = var_111_to_fp16, x = var_100_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_115_to_fp16 = const()[name = string("op_115_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7399552)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8579264)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_115_to_fp16, x = var_100_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8580864)))]; + tensor var_120_to_fp16 = const()[name = string("op_120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9760576)))]; + tensor linear_2_cast_fp16 = linear(bias = var_120_to_fp16, weight = var_119_to_fp16, x = var_100_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_128 = const()[name = string("op_128"), val = tensor([1, 1500, 12, -1])]; + tensor var_129_cast_fp16 = reshape(shape = var_128, x = linear_0_cast_fp16)[name = string("op_129_cast_fp16")]; + tensor const_84_to_fp16 = const()[name = string("const_84_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_129_cast_fp16, y = const_84_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_135 = const()[name = string("op_135"), val = tensor([1, 1500, 12, -1])]; + tensor var_136_cast_fp16 = reshape(shape = var_135, x = linear_1_cast_fp16)[name = string("op_136_cast_fp16")]; + tensor const_85_to_fp16 = const()[name = string("const_85_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_85_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_142 = const()[name = string("op_142"), val = tensor([1, 1500, 12, -1])]; + tensor var_143_cast_fp16 = reshape(shape = var_142, x = linear_2_cast_fp16)[name = string("op_143_cast_fp16")]; + tensor var_144 = const()[name = string("op_144"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = k_3_cast_fp16)[name = string("transpose_117")]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = q_3_cast_fp16)[name = string("transpose_118")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_48, y = transpose_49)[name = string("qk_1_cast_fp16")]; + tensor var_148_cast_fp16 = softmax(axis = var_84, x = qk_1_cast_fp16)[name = string("op_148_cast_fp16")]; + bool var_150_transpose_x_0 = const()[name = string("op_150_transpose_x_0"), val = bool(false)]; + bool var_150_transpose_y_0 = const()[name = string("op_150_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_144, x = var_143_cast_fp16)[name = string("transpose_119")]; + tensor var_150_cast_fp16 = matmul(transpose_x = var_150_transpose_x_0, transpose_y = var_150_transpose_y_0, x = var_148_cast_fp16, y = v_3_cast_fp16)[name = string("op_150_cast_fp16")]; + tensor var_151 = const()[name = string("op_151"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 768])]; + tensor var_152_cast_fp16 = transpose(perm = var_151, x = var_150_cast_fp16)[name = string("transpose_116")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_152_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_156_to_fp16 = const()[name = string("op_156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9762176)))]; + tensor var_157_to_fp16 = const()[name = string("op_157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10941888)))]; + tensor linear_3_cast_fp16 = linear(bias = var_157_to_fp16, weight = var_156_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_71_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_164_axes_0 = const()[name = string("op_164_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10943488)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10945088)))]; + tensor var_164_cast_fp16 = layer_norm(axes = var_164_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_164_cast_fp16")]; + tensor var_173_to_fp16 = const()[name = string("op_173_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10946688)))]; + tensor var_174_to_fp16 = const()[name = string("op_174_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15665344)))]; + tensor linear_4_cast_fp16 = linear(bias = var_174_to_fp16, weight = var_173_to_fp16, x = var_164_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15671552)))]; + tensor var_180_to_fp16 = const()[name = string("op_180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20390208)))]; + tensor linear_5_cast_fp16 = linear(bias = var_180_to_fp16, weight = var_179_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_190 = const()[name = string("op_190"), val = int32(-1)]; + tensor var_206_axes_0 = const()[name = string("op_206_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20391808)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20393408)))]; + fp16 var_196_to_fp16 = const()[name = string("op_196_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_206_cast_fp16 = layer_norm(axes = var_206_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_206_cast_fp16")]; + tensor var_217_to_fp16 = const()[name = string("op_217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20395008)))]; + tensor var_218_to_fp16 = const()[name = string("op_218_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21574720)))]; + tensor linear_6_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_206_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_221_to_fp16 = const()[name = string("op_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21576320)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_221_to_fp16, x = var_206_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_225_to_fp16 = const()[name = string("op_225_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22756032)))]; + tensor var_226_to_fp16 = const()[name = string("op_226_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23935744)))]; + tensor linear_8_cast_fp16 = linear(bias = var_226_to_fp16, weight = var_225_to_fp16, x = var_206_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_234 = const()[name = string("op_234"), val = tensor([1, 1500, 12, -1])]; + tensor var_235_cast_fp16 = reshape(shape = var_234, x = linear_6_cast_fp16)[name = string("op_235_cast_fp16")]; + tensor const_86_to_fp16 = const()[name = string("const_86_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_235_cast_fp16, y = const_86_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_241 = const()[name = string("op_241"), val = tensor([1, 1500, 12, -1])]; + tensor var_242_cast_fp16 = reshape(shape = var_241, x = linear_7_cast_fp16)[name = string("op_242_cast_fp16")]; + tensor const_87_to_fp16 = const()[name = string("const_87_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_242_cast_fp16, y = const_87_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_248 = const()[name = string("op_248"), val = tensor([1, 1500, 12, -1])]; + tensor var_249_cast_fp16 = reshape(shape = var_248, x = linear_8_cast_fp16)[name = string("op_249_cast_fp16")]; + tensor var_250 = const()[name = string("op_250"), val = tensor([0, 2, 1, 3])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_50_perm_0 = const()[name = string("transpose_50_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_51_perm_0 = const()[name = string("transpose_51_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = k_7_cast_fp16)[name = string("transpose_113")]; + tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = q_7_cast_fp16)[name = string("transpose_114")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_50, y = transpose_51)[name = string("qk_3_cast_fp16")]; + tensor var_254_cast_fp16 = softmax(axis = var_190, x = qk_3_cast_fp16)[name = string("op_254_cast_fp16")]; + bool var_256_transpose_x_0 = const()[name = string("op_256_transpose_x_0"), val = bool(false)]; + bool var_256_transpose_y_0 = const()[name = string("op_256_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_250, x = var_249_cast_fp16)[name = string("transpose_115")]; + tensor var_256_cast_fp16 = matmul(transpose_x = var_256_transpose_x_0, transpose_y = var_256_transpose_y_0, x = var_254_cast_fp16, y = v_7_cast_fp16)[name = string("op_256_cast_fp16")]; + tensor var_257 = const()[name = string("op_257"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 768])]; + tensor var_258_cast_fp16 = transpose(perm = var_257, x = var_256_cast_fp16)[name = string("transpose_112")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_258_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_262_to_fp16 = const()[name = string("op_262_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23937344)))]; + tensor var_263_to_fp16 = const()[name = string("op_263_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25117056)))]; + tensor linear_9_cast_fp16 = linear(bias = var_263_to_fp16, weight = var_262_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_270_axes_0 = const()[name = string("op_270_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25118656)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25120256)))]; + tensor var_270_cast_fp16 = layer_norm(axes = var_270_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_270_cast_fp16")]; + tensor var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25121856)))]; + tensor var_280_to_fp16 = const()[name = string("op_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29840512)))]; + tensor linear_10_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = var_270_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_285_to_fp16 = const()[name = string("op_285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29846720)))]; + tensor var_286_to_fp16 = const()[name = string("op_286_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34565376)))]; + tensor linear_11_cast_fp16 = linear(bias = var_286_to_fp16, weight = var_285_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_296 = const()[name = string("op_296"), val = int32(-1)]; + tensor var_312_axes_0 = const()[name = string("op_312_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34566976)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34568576)))]; + fp16 var_302_to_fp16 = const()[name = string("op_302_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_312_cast_fp16 = layer_norm(axes = var_312_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_312_cast_fp16")]; + tensor var_323_to_fp16 = const()[name = string("op_323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34570176)))]; + tensor var_324_to_fp16 = const()[name = string("op_324_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35749888)))]; + tensor linear_12_cast_fp16 = linear(bias = var_324_to_fp16, weight = var_323_to_fp16, x = var_312_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_327_to_fp16 = const()[name = string("op_327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35751488)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_327_to_fp16, x = var_312_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_331_to_fp16 = const()[name = string("op_331_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36931200)))]; + tensor var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38110912)))]; + tensor linear_14_cast_fp16 = linear(bias = var_332_to_fp16, weight = var_331_to_fp16, x = var_312_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_340 = const()[name = string("op_340"), val = tensor([1, 1500, 12, -1])]; + tensor var_341_cast_fp16 = reshape(shape = var_340, x = linear_12_cast_fp16)[name = string("op_341_cast_fp16")]; + tensor const_88_to_fp16 = const()[name = string("const_88_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_341_cast_fp16, y = const_88_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_347 = const()[name = string("op_347"), val = tensor([1, 1500, 12, -1])]; + tensor var_348_cast_fp16 = reshape(shape = var_347, x = linear_13_cast_fp16)[name = string("op_348_cast_fp16")]; + tensor const_89_to_fp16 = const()[name = string("const_89_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_348_cast_fp16, y = const_89_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_354 = const()[name = string("op_354"), val = tensor([1, 1500, 12, -1])]; + tensor var_355_cast_fp16 = reshape(shape = var_354, x = linear_14_cast_fp16)[name = string("op_355_cast_fp16")]; + tensor var_356 = const()[name = string("op_356"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_52_perm_0 = const()[name = string("transpose_52_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_53 = transpose(perm = transpose_53_perm_0, x = k_11_cast_fp16)[name = string("transpose_109")]; + tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = q_11_cast_fp16)[name = string("transpose_110")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_52, y = transpose_53)[name = string("qk_5_cast_fp16")]; + tensor var_360_cast_fp16 = softmax(axis = var_296, x = qk_5_cast_fp16)[name = string("op_360_cast_fp16")]; + bool var_362_transpose_x_0 = const()[name = string("op_362_transpose_x_0"), val = bool(false)]; + bool var_362_transpose_y_0 = const()[name = string("op_362_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_356, x = var_355_cast_fp16)[name = string("transpose_111")]; + tensor var_362_cast_fp16 = matmul(transpose_x = var_362_transpose_x_0, transpose_y = var_362_transpose_y_0, x = var_360_cast_fp16, y = v_11_cast_fp16)[name = string("op_362_cast_fp16")]; + tensor var_363 = const()[name = string("op_363"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 768])]; + tensor var_364_cast_fp16 = transpose(perm = var_363, x = var_362_cast_fp16)[name = string("transpose_108")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_364_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38112512)))]; + tensor var_369_to_fp16 = const()[name = string("op_369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39292224)))]; + tensor linear_15_cast_fp16 = linear(bias = var_369_to_fp16, weight = var_368_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_376_axes_0 = const()[name = string("op_376_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39293824)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39295424)))]; + tensor var_376_cast_fp16 = layer_norm(axes = var_376_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_376_cast_fp16")]; + tensor var_385_to_fp16 = const()[name = string("op_385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39297024)))]; + tensor var_386_to_fp16 = const()[name = string("op_386_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44015680)))]; + tensor linear_16_cast_fp16 = linear(bias = var_386_to_fp16, weight = var_385_to_fp16, x = var_376_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_391_to_fp16 = const()[name = string("op_391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44021888)))]; + tensor var_392_to_fp16 = const()[name = string("op_392_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48740544)))]; + tensor linear_17_cast_fp16 = linear(bias = var_392_to_fp16, weight = var_391_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_402 = const()[name = string("op_402"), val = int32(-1)]; + tensor var_418_axes_0 = const()[name = string("op_418_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742144)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48743744)))]; + fp16 var_408_to_fp16 = const()[name = string("op_408_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_418_cast_fp16")]; + tensor var_429_to_fp16 = const()[name = string("op_429_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48745344)))]; + tensor var_430_to_fp16 = const()[name = string("op_430_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49925056)))]; + tensor linear_18_cast_fp16 = linear(bias = var_430_to_fp16, weight = var_429_to_fp16, x = var_418_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_433_to_fp16 = const()[name = string("op_433_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49926656)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_433_to_fp16, x = var_418_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_437_to_fp16 = const()[name = string("op_437_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51106368)))]; + tensor var_438_to_fp16 = const()[name = string("op_438_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52286080)))]; + tensor linear_20_cast_fp16 = linear(bias = var_438_to_fp16, weight = var_437_to_fp16, x = var_418_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_446 = const()[name = string("op_446"), val = tensor([1, 1500, 12, -1])]; + tensor var_447_cast_fp16 = reshape(shape = var_446, x = linear_18_cast_fp16)[name = string("op_447_cast_fp16")]; + tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_447_cast_fp16, y = const_90_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_453 = const()[name = string("op_453"), val = tensor([1, 1500, 12, -1])]; + tensor var_454_cast_fp16 = reshape(shape = var_453, x = linear_19_cast_fp16)[name = string("op_454_cast_fp16")]; + tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_454_cast_fp16, y = const_91_to_fp16)[name = string("k_15_cast_fp16")]; + tensor var_460 = const()[name = string("op_460"), val = tensor([1, 1500, 12, -1])]; + tensor var_461_cast_fp16 = reshape(shape = var_460, x = linear_20_cast_fp16)[name = string("op_461_cast_fp16")]; + tensor var_462 = const()[name = string("op_462"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_54_perm_0 = const()[name = string("transpose_54_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_55_perm_0 = const()[name = string("transpose_55_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = k_15_cast_fp16)[name = string("transpose_105")]; + tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = q_15_cast_fp16)[name = string("transpose_106")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_54, y = transpose_55)[name = string("qk_7_cast_fp16")]; + tensor var_466_cast_fp16 = softmax(axis = var_402, x = qk_7_cast_fp16)[name = string("op_466_cast_fp16")]; + bool var_468_transpose_x_0 = const()[name = string("op_468_transpose_x_0"), val = bool(false)]; + bool var_468_transpose_y_0 = const()[name = string("op_468_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_462, x = var_461_cast_fp16)[name = string("transpose_107")]; + tensor var_468_cast_fp16 = matmul(transpose_x = var_468_transpose_x_0, transpose_y = var_468_transpose_y_0, x = var_466_cast_fp16, y = v_15_cast_fp16)[name = string("op_468_cast_fp16")]; + tensor var_469 = const()[name = string("op_469"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 768])]; + tensor var_470_cast_fp16 = transpose(perm = var_469, x = var_468_cast_fp16)[name = string("transpose_104")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_470_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_474_to_fp16 = const()[name = string("op_474_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52287680)))]; + tensor var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53467392)))]; + tensor linear_21_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_482_axes_0 = const()[name = string("op_482_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53468992)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53470592)))]; + tensor var_482_cast_fp16 = layer_norm(axes = var_482_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_482_cast_fp16")]; + tensor var_491_to_fp16 = const()[name = string("op_491_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53472192)))]; + tensor var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58190848)))]; + tensor linear_22_cast_fp16 = linear(bias = var_492_to_fp16, weight = var_491_to_fp16, x = var_482_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_497_to_fp16 = const()[name = string("op_497_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58197056)))]; + tensor var_498_to_fp16 = const()[name = string("op_498_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62915712)))]; + tensor linear_23_cast_fp16 = linear(bias = var_498_to_fp16, weight = var_497_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_55_cast_fp16")]; + int32 var_508 = const()[name = string("op_508"), val = int32(-1)]; + tensor var_524_axes_0 = const()[name = string("op_524_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = string("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62917312)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = string("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62918912)))]; + fp16 var_514_to_fp16 = const()[name = string("op_514_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_524_cast_fp16 = layer_norm(axes = var_524_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = string("op_524_cast_fp16")]; + tensor var_535_to_fp16 = const()[name = string("op_535_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62920512)))]; + tensor var_536_to_fp16 = const()[name = string("op_536_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64100224)))]; + tensor linear_24_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_524_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64101824)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_539_to_fp16, x = var_524_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_543_to_fp16 = const()[name = string("op_543_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65281536)))]; + tensor var_544_to_fp16 = const()[name = string("op_544_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66461248)))]; + tensor linear_26_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = var_524_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_552 = const()[name = string("op_552"), val = tensor([1, 1500, 12, -1])]; + tensor var_553_cast_fp16 = reshape(shape = var_552, x = linear_24_cast_fp16)[name = string("op_553_cast_fp16")]; + tensor const_92_to_fp16 = const()[name = string("const_92_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_553_cast_fp16, y = const_92_to_fp16)[name = string("q_19_cast_fp16")]; + tensor var_559 = const()[name = string("op_559"), val = tensor([1, 1500, 12, -1])]; + tensor var_560_cast_fp16 = reshape(shape = var_559, x = linear_25_cast_fp16)[name = string("op_560_cast_fp16")]; + tensor const_93_to_fp16 = const()[name = string("const_93_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_560_cast_fp16, y = const_93_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_566 = const()[name = string("op_566"), val = tensor([1, 1500, 12, -1])]; + tensor var_567_cast_fp16 = reshape(shape = var_566, x = linear_26_cast_fp16)[name = string("op_567_cast_fp16")]; + tensor var_568 = const()[name = string("op_568"), val = tensor([0, 2, 1, 3])]; + bool qk_9_transpose_x_0 = const()[name = string("qk_9_transpose_x_0"), val = bool(false)]; + bool qk_9_transpose_y_0 = const()[name = string("qk_9_transpose_y_0"), val = bool(false)]; + tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = k_19_cast_fp16)[name = string("transpose_101")]; + tensor transpose_56 = transpose(perm = transpose_56_perm_0, x = q_19_cast_fp16)[name = string("transpose_102")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_56, y = transpose_57)[name = string("qk_9_cast_fp16")]; + tensor var_572_cast_fp16 = softmax(axis = var_508, x = qk_9_cast_fp16)[name = string("op_572_cast_fp16")]; + bool var_574_transpose_x_0 = const()[name = string("op_574_transpose_x_0"), val = bool(false)]; + bool var_574_transpose_y_0 = const()[name = string("op_574_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_568, x = var_567_cast_fp16)[name = string("transpose_103")]; + tensor var_574_cast_fp16 = matmul(transpose_x = var_574_transpose_x_0, transpose_y = var_574_transpose_y_0, x = var_572_cast_fp16, y = v_19_cast_fp16)[name = string("op_574_cast_fp16")]; + tensor var_575 = const()[name = string("op_575"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = string("concat_4"), val = tensor([1, 1500, 768])]; + tensor var_576_cast_fp16 = transpose(perm = var_575, x = var_574_cast_fp16)[name = string("transpose_100")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = var_576_cast_fp16)[name = string("x_59_cast_fp16")]; + tensor var_580_to_fp16 = const()[name = string("op_580_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66462848)))]; + tensor var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67642560)))]; + tensor linear_27_cast_fp16 = linear(bias = var_581_to_fp16, weight = var_580_to_fp16, x = x_59_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_588_axes_0 = const()[name = string("op_588_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = string("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67644160)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = string("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67645760)))]; + tensor var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = string("op_588_cast_fp16")]; + tensor var_597_to_fp16 = const()[name = string("op_597_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67647360)))]; + tensor var_598_to_fp16 = const()[name = string("op_598_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72366016)))]; + tensor linear_28_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = var_588_cast_fp16)[name = string("linear_28_cast_fp16")]; + string x_65_mode_0 = const()[name = string("x_65_mode_0"), val = string("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = string("x_65_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72372224)))]; + tensor var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77090880)))]; + tensor linear_29_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = x_65_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = string("x_67_cast_fp16")]; + int32 var_614 = const()[name = string("op_614"), val = int32(-1)]; + tensor var_630_axes_0 = const()[name = string("op_630_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = string("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77092480)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = string("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77094080)))]; + fp16 var_620_to_fp16 = const()[name = string("op_620_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_630_cast_fp16 = layer_norm(axes = var_630_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = string("op_630_cast_fp16")]; + tensor var_641_to_fp16 = const()[name = string("op_641_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77095680)))]; + tensor var_642_to_fp16 = const()[name = string("op_642_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78275392)))]; + tensor linear_30_cast_fp16 = linear(bias = var_642_to_fp16, weight = var_641_to_fp16, x = var_630_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor var_645_to_fp16 = const()[name = string("op_645_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78276992)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_645_to_fp16, x = var_630_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor var_649_to_fp16 = const()[name = string("op_649_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79456704)))]; + tensor var_650_to_fp16 = const()[name = string("op_650_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80636416)))]; + tensor linear_32_cast_fp16 = linear(bias = var_650_to_fp16, weight = var_649_to_fp16, x = var_630_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_658 = const()[name = string("op_658"), val = tensor([1, 1500, 12, -1])]; + tensor var_659_cast_fp16 = reshape(shape = var_658, x = linear_30_cast_fp16)[name = string("op_659_cast_fp16")]; + tensor const_94_to_fp16 = const()[name = string("const_94_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_94_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_665 = const()[name = string("op_665"), val = tensor([1, 1500, 12, -1])]; + tensor var_666_cast_fp16 = reshape(shape = var_665, x = linear_31_cast_fp16)[name = string("op_666_cast_fp16")]; + tensor const_95_to_fp16 = const()[name = string("const_95_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_23_cast_fp16 = mul(x = var_666_cast_fp16, y = const_95_to_fp16)[name = string("k_23_cast_fp16")]; + tensor var_672 = const()[name = string("op_672"), val = tensor([1, 1500, 12, -1])]; + tensor var_673_cast_fp16 = reshape(shape = var_672, x = linear_32_cast_fp16)[name = string("op_673_cast_fp16")]; + tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_59 = transpose(perm = transpose_59_perm_0, x = k_23_cast_fp16)[name = string("transpose_97")]; + tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = q_23_cast_fp16)[name = string("transpose_98")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_58, y = transpose_59)[name = string("qk_11_cast_fp16")]; + tensor var_678_cast_fp16 = softmax(axis = var_614, x = qk_11_cast_fp16)[name = string("op_678_cast_fp16")]; + bool var_680_transpose_x_0 = const()[name = string("op_680_transpose_x_0"), val = bool(false)]; + bool var_680_transpose_y_0 = const()[name = string("op_680_transpose_y_0"), val = bool(false)]; + tensor v_23_cast_fp16 = transpose(perm = var_674, x = var_673_cast_fp16)[name = string("transpose_99")]; + tensor var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = v_23_cast_fp16)[name = string("op_680_cast_fp16")]; + tensor var_681 = const()[name = string("op_681"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([1, 1500, 768])]; + tensor var_682_cast_fp16 = transpose(perm = var_681, x = var_680_cast_fp16)[name = string("transpose_96")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = var_682_cast_fp16)[name = string("x_71_cast_fp16")]; + tensor var_686_to_fp16 = const()[name = string("op_686_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80638016)))]; + tensor var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81817728)))]; + tensor linear_33_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_71_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_694_axes_0 = const()[name = string("op_694_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = string("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81819328)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = string("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81820928)))]; + tensor var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = string("op_694_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81822528)))]; + tensor var_704_to_fp16 = const()[name = string("op_704_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86541184)))]; + tensor linear_34_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = string("linear_34_cast_fp16")]; + string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = string("x_77_cast_fp16")]; + tensor var_709_to_fp16 = const()[name = string("op_709_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86547392)))]; + tensor var_710_to_fp16 = const()[name = string("op_710_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91266048)))]; + tensor linear_35_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_77_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = string("x_79_cast_fp16")]; + int32 var_720 = const()[name = string("op_720"), val = int32(-1)]; + tensor var_736_axes_0 = const()[name = string("op_736_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = string("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91267648)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = string("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91269248)))]; + fp16 var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_736_cast_fp16 = layer_norm(axes = var_736_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = string("op_736_cast_fp16")]; + tensor var_747_to_fp16 = const()[name = string("op_747_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91270848)))]; + tensor var_748_to_fp16 = const()[name = string("op_748_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92450560)))]; + tensor linear_36_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = var_736_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor var_751_to_fp16 = const()[name = string("op_751_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92452160)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_751_to_fp16, x = var_736_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor var_755_to_fp16 = const()[name = string("op_755_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93631872)))]; + tensor var_756_to_fp16 = const()[name = string("op_756_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94811584)))]; + tensor linear_38_cast_fp16 = linear(bias = var_756_to_fp16, weight = var_755_to_fp16, x = var_736_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor var_764 = const()[name = string("op_764"), val = tensor([1, 1500, 12, -1])]; + tensor var_765_cast_fp16 = reshape(shape = var_764, x = linear_36_cast_fp16)[name = string("op_765_cast_fp16")]; + tensor const_96_to_fp16 = const()[name = string("const_96_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_765_cast_fp16, y = const_96_to_fp16)[name = string("q_27_cast_fp16")]; + tensor var_771 = const()[name = string("op_771"), val = tensor([1, 1500, 12, -1])]; + tensor var_772_cast_fp16 = reshape(shape = var_771, x = linear_37_cast_fp16)[name = string("op_772_cast_fp16")]; + tensor const_97_to_fp16 = const()[name = string("const_97_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_27_cast_fp16 = mul(x = var_772_cast_fp16, y = const_97_to_fp16)[name = string("k_27_cast_fp16")]; + tensor var_778 = const()[name = string("op_778"), val = tensor([1, 1500, 12, -1])]; + tensor var_779_cast_fp16 = reshape(shape = var_778, x = linear_38_cast_fp16)[name = string("op_779_cast_fp16")]; + tensor var_780 = const()[name = string("op_780"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = k_27_cast_fp16)[name = string("transpose_93")]; + tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = q_27_cast_fp16)[name = string("transpose_94")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_60, y = transpose_61)[name = string("qk_13_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_720, x = qk_13_cast_fp16)[name = string("op_784_cast_fp16")]; + bool var_786_transpose_x_0 = const()[name = string("op_786_transpose_x_0"), val = bool(false)]; + bool var_786_transpose_y_0 = const()[name = string("op_786_transpose_y_0"), val = bool(false)]; + tensor v_27_cast_fp16 = transpose(perm = var_780, x = var_779_cast_fp16)[name = string("transpose_95")]; + tensor var_786_cast_fp16 = matmul(transpose_x = var_786_transpose_x_0, transpose_y = var_786_transpose_y_0, x = var_784_cast_fp16, y = v_27_cast_fp16)[name = string("op_786_cast_fp16")]; + tensor var_787 = const()[name = string("op_787"), val = tensor([0, 2, 1, 3])]; + tensor concat_6 = const()[name = string("concat_6"), val = tensor([1, 1500, 768])]; + tensor var_788_cast_fp16 = transpose(perm = var_787, x = var_786_cast_fp16)[name = string("transpose_92")]; + tensor x_83_cast_fp16 = reshape(shape = concat_6, x = var_788_cast_fp16)[name = string("x_83_cast_fp16")]; + tensor var_792_to_fp16 = const()[name = string("op_792_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94813184)))]; + tensor var_793_to_fp16 = const()[name = string("op_793_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95992896)))]; + tensor linear_39_cast_fp16 = linear(bias = var_793_to_fp16, weight = var_792_to_fp16, x = x_83_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = string("x_85_cast_fp16")]; + tensor var_800_axes_0 = const()[name = string("op_800_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = string("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95994496)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = string("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95996096)))]; + tensor var_800_cast_fp16 = layer_norm(axes = var_800_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = string("op_800_cast_fp16")]; + tensor var_809_to_fp16 = const()[name = string("op_809_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95997696)))]; + tensor var_810_to_fp16 = const()[name = string("op_810_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100716352)))]; + tensor linear_40_cast_fp16 = linear(bias = var_810_to_fp16, weight = var_809_to_fp16, x = var_800_cast_fp16)[name = string("linear_40_cast_fp16")]; + string x_89_mode_0 = const()[name = string("x_89_mode_0"), val = string("EXACT")]; + tensor x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = string("x_89_cast_fp16")]; + tensor var_815_to_fp16 = const()[name = string("op_815_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100722560)))]; + tensor var_816_to_fp16 = const()[name = string("op_816_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105441216)))]; + tensor linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = x_89_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = string("x_91_cast_fp16")]; + int32 var_826 = const()[name = string("op_826"), val = int32(-1)]; + tensor var_842_axes_0 = const()[name = string("op_842_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = string("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105442816)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = string("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105444416)))]; + fp16 var_832_to_fp16 = const()[name = string("op_832_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_842_cast_fp16 = layer_norm(axes = var_842_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = string("op_842_cast_fp16")]; + tensor var_853_to_fp16 = const()[name = string("op_853_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105446016)))]; + tensor var_854_to_fp16 = const()[name = string("op_854_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106625728)))]; + tensor linear_42_cast_fp16 = linear(bias = var_854_to_fp16, weight = var_853_to_fp16, x = var_842_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor var_857_to_fp16 = const()[name = string("op_857_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106627328)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_857_to_fp16, x = var_842_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor var_861_to_fp16 = const()[name = string("op_861_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107807040)))]; + tensor var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108986752)))]; + tensor linear_44_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = var_842_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor var_870 = const()[name = string("op_870"), val = tensor([1, 1500, 12, -1])]; + tensor var_871_cast_fp16 = reshape(shape = var_870, x = linear_42_cast_fp16)[name = string("op_871_cast_fp16")]; + tensor const_98_to_fp16 = const()[name = string("const_98_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_871_cast_fp16, y = const_98_to_fp16)[name = string("q_31_cast_fp16")]; + tensor var_877 = const()[name = string("op_877"), val = tensor([1, 1500, 12, -1])]; + tensor var_878_cast_fp16 = reshape(shape = var_877, x = linear_43_cast_fp16)[name = string("op_878_cast_fp16")]; + tensor const_99_to_fp16 = const()[name = string("const_99_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_31_cast_fp16 = mul(x = var_878_cast_fp16, y = const_99_to_fp16)[name = string("k_31_cast_fp16")]; + tensor var_884 = const()[name = string("op_884"), val = tensor([1, 1500, 12, -1])]; + tensor var_885_cast_fp16 = reshape(shape = var_884, x = linear_44_cast_fp16)[name = string("op_885_cast_fp16")]; + tensor var_886 = const()[name = string("op_886"), val = tensor([0, 2, 1, 3])]; + bool qk_15_transpose_x_0 = const()[name = string("qk_15_transpose_x_0"), val = bool(false)]; + bool qk_15_transpose_y_0 = const()[name = string("qk_15_transpose_y_0"), val = bool(false)]; + tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = k_31_cast_fp16)[name = string("transpose_89")]; + tensor transpose_62 = transpose(perm = transpose_62_perm_0, x = q_31_cast_fp16)[name = string("transpose_90")]; + tensor qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_62, y = transpose_63)[name = string("qk_15_cast_fp16")]; + tensor var_890_cast_fp16 = softmax(axis = var_826, x = qk_15_cast_fp16)[name = string("op_890_cast_fp16")]; + bool var_892_transpose_x_0 = const()[name = string("op_892_transpose_x_0"), val = bool(false)]; + bool var_892_transpose_y_0 = const()[name = string("op_892_transpose_y_0"), val = bool(false)]; + tensor v_31_cast_fp16 = transpose(perm = var_886, x = var_885_cast_fp16)[name = string("transpose_91")]; + tensor var_892_cast_fp16 = matmul(transpose_x = var_892_transpose_x_0, transpose_y = var_892_transpose_y_0, x = var_890_cast_fp16, y = v_31_cast_fp16)[name = string("op_892_cast_fp16")]; + tensor var_893 = const()[name = string("op_893"), val = tensor([0, 2, 1, 3])]; + tensor concat_7 = const()[name = string("concat_7"), val = tensor([1, 1500, 768])]; + tensor var_894_cast_fp16 = transpose(perm = var_893, x = var_892_cast_fp16)[name = string("transpose_88")]; + tensor x_95_cast_fp16 = reshape(shape = concat_7, x = var_894_cast_fp16)[name = string("x_95_cast_fp16")]; + tensor var_898_to_fp16 = const()[name = string("op_898_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108988352)))]; + tensor var_899_to_fp16 = const()[name = string("op_899_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110168064)))]; + tensor linear_45_cast_fp16 = linear(bias = var_899_to_fp16, weight = var_898_to_fp16, x = x_95_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = string("x_97_cast_fp16")]; + tensor var_906_axes_0 = const()[name = string("op_906_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = string("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110169664)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = string("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110171264)))]; + tensor var_906_cast_fp16 = layer_norm(axes = var_906_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = string("op_906_cast_fp16")]; + tensor var_915_to_fp16 = const()[name = string("op_915_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110172864)))]; + tensor var_916_to_fp16 = const()[name = string("op_916_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114891520)))]; + tensor linear_46_cast_fp16 = linear(bias = var_916_to_fp16, weight = var_915_to_fp16, x = var_906_cast_fp16)[name = string("linear_46_cast_fp16")]; + string x_101_mode_0 = const()[name = string("x_101_mode_0"), val = string("EXACT")]; + tensor x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = string("x_101_cast_fp16")]; + tensor var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114897728)))]; + tensor var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119616384)))]; + tensor linear_47_cast_fp16 = linear(bias = var_922_to_fp16, weight = var_921_to_fp16, x = x_101_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = string("x_103_cast_fp16")]; + int32 var_932 = const()[name = string("op_932"), val = int32(-1)]; + tensor var_948_axes_0 = const()[name = string("op_948_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = string("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119617984)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = string("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119619584)))]; + fp16 var_938_to_fp16 = const()[name = string("op_938_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_948_cast_fp16 = layer_norm(axes = var_948_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = string("op_948_cast_fp16")]; + tensor var_959_to_fp16 = const()[name = string("op_959_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119621184)))]; + tensor var_960_to_fp16 = const()[name = string("op_960_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120800896)))]; + tensor linear_48_cast_fp16 = linear(bias = var_960_to_fp16, weight = var_959_to_fp16, x = var_948_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120802496)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_963_to_fp16, x = var_948_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121982208)))]; + tensor var_968_to_fp16 = const()[name = string("op_968_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123161920)))]; + tensor linear_50_cast_fp16 = linear(bias = var_968_to_fp16, weight = var_967_to_fp16, x = var_948_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor var_976 = const()[name = string("op_976"), val = tensor([1, 1500, 12, -1])]; + tensor var_977_cast_fp16 = reshape(shape = var_976, x = linear_48_cast_fp16)[name = string("op_977_cast_fp16")]; + tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_977_cast_fp16, y = const_100_to_fp16)[name = string("q_35_cast_fp16")]; + tensor var_983 = const()[name = string("op_983"), val = tensor([1, 1500, 12, -1])]; + tensor var_984_cast_fp16 = reshape(shape = var_983, x = linear_49_cast_fp16)[name = string("op_984_cast_fp16")]; + tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_984_cast_fp16, y = const_101_to_fp16)[name = string("k_35_cast_fp16")]; + tensor var_990 = const()[name = string("op_990"), val = tensor([1, 1500, 12, -1])]; + tensor var_991_cast_fp16 = reshape(shape = var_990, x = linear_50_cast_fp16)[name = string("op_991_cast_fp16")]; + tensor var_992 = const()[name = string("op_992"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = k_35_cast_fp16)[name = string("transpose_85")]; + tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = q_35_cast_fp16)[name = string("transpose_86")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_64, y = transpose_65)[name = string("qk_17_cast_fp16")]; + tensor var_996_cast_fp16 = softmax(axis = var_932, x = qk_17_cast_fp16)[name = string("op_996_cast_fp16")]; + bool var_998_transpose_x_0 = const()[name = string("op_998_transpose_x_0"), val = bool(false)]; + bool var_998_transpose_y_0 = const()[name = string("op_998_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_992, x = var_991_cast_fp16)[name = string("transpose_87")]; + tensor var_998_cast_fp16 = matmul(transpose_x = var_998_transpose_x_0, transpose_y = var_998_transpose_y_0, x = var_996_cast_fp16, y = v_35_cast_fp16)[name = string("op_998_cast_fp16")]; + tensor var_999 = const()[name = string("op_999"), val = tensor([0, 2, 1, 3])]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([1, 1500, 768])]; + tensor var_1000_cast_fp16 = transpose(perm = var_999, x = var_998_cast_fp16)[name = string("transpose_84")]; + tensor x_107_cast_fp16 = reshape(shape = concat_8, x = var_1000_cast_fp16)[name = string("x_107_cast_fp16")]; + tensor var_1004_to_fp16 = const()[name = string("op_1004_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123163520)))]; + tensor var_1005_to_fp16 = const()[name = string("op_1005_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124343232)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1005_to_fp16, weight = var_1004_to_fp16, x = x_107_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = string("x_109_cast_fp16")]; + tensor var_1012_axes_0 = const()[name = string("op_1012_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = string("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124344832)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = string("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124346432)))]; + tensor var_1012_cast_fp16 = layer_norm(axes = var_1012_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = string("op_1012_cast_fp16")]; + tensor var_1021_to_fp16 = const()[name = string("op_1021_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124348032)))]; + tensor var_1022_to_fp16 = const()[name = string("op_1022_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129066688)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1012_cast_fp16)[name = string("linear_52_cast_fp16")]; + string x_113_mode_0 = const()[name = string("x_113_mode_0"), val = string("EXACT")]; + tensor x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = string("x_113_cast_fp16")]; + tensor var_1027_to_fp16 = const()[name = string("op_1027_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129072896)))]; + tensor var_1028_to_fp16 = const()[name = string("op_1028_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133791552)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1028_to_fp16, weight = var_1027_to_fp16, x = x_113_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = string("x_115_cast_fp16")]; + int32 var_1038 = const()[name = string("op_1038"), val = int32(-1)]; + tensor var_1054_axes_0 = const()[name = string("op_1054_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = string("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133793152)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = string("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133794752)))]; + fp16 var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1054_cast_fp16 = layer_norm(axes = var_1054_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = string("op_1054_cast_fp16")]; + tensor var_1065_to_fp16 = const()[name = string("op_1065_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133796352)))]; + tensor var_1066_to_fp16 = const()[name = string("op_1066_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134976064)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1066_to_fp16, weight = var_1065_to_fp16, x = var_1054_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134977664)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1069_to_fp16, x = var_1054_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor var_1073_to_fp16 = const()[name = string("op_1073_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136157376)))]; + tensor var_1074_to_fp16 = const()[name = string("op_1074_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137337088)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1074_to_fp16, weight = var_1073_to_fp16, x = var_1054_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor var_1082 = const()[name = string("op_1082"), val = tensor([1, 1500, 12, -1])]; + tensor var_1083_cast_fp16 = reshape(shape = var_1082, x = linear_54_cast_fp16)[name = string("op_1083_cast_fp16")]; + tensor const_102_to_fp16 = const()[name = string("const_102_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1083_cast_fp16, y = const_102_to_fp16)[name = string("q_39_cast_fp16")]; + tensor var_1089 = const()[name = string("op_1089"), val = tensor([1, 1500, 12, -1])]; + tensor var_1090_cast_fp16 = reshape(shape = var_1089, x = linear_55_cast_fp16)[name = string("op_1090_cast_fp16")]; + tensor const_103_to_fp16 = const()[name = string("const_103_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_1090_cast_fp16, y = const_103_to_fp16)[name = string("k_39_cast_fp16")]; + tensor var_1096 = const()[name = string("op_1096"), val = tensor([1, 1500, 12, -1])]; + tensor var_1097_cast_fp16 = reshape(shape = var_1096, x = linear_56_cast_fp16)[name = string("op_1097_cast_fp16")]; + tensor var_1098 = const()[name = string("op_1098"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = k_39_cast_fp16)[name = string("transpose_81")]; + tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = q_39_cast_fp16)[name = string("transpose_82")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_66, y = transpose_67)[name = string("qk_19_cast_fp16")]; + tensor var_1102_cast_fp16 = softmax(axis = var_1038, x = qk_19_cast_fp16)[name = string("op_1102_cast_fp16")]; + bool var_1104_transpose_x_0 = const()[name = string("op_1104_transpose_x_0"), val = bool(false)]; + bool var_1104_transpose_y_0 = const()[name = string("op_1104_transpose_y_0"), val = bool(false)]; + tensor v_39_cast_fp16 = transpose(perm = var_1098, x = var_1097_cast_fp16)[name = string("transpose_83")]; + tensor var_1104_cast_fp16 = matmul(transpose_x = var_1104_transpose_x_0, transpose_y = var_1104_transpose_y_0, x = var_1102_cast_fp16, y = v_39_cast_fp16)[name = string("op_1104_cast_fp16")]; + tensor var_1105 = const()[name = string("op_1105"), val = tensor([0, 2, 1, 3])]; + tensor concat_9 = const()[name = string("concat_9"), val = tensor([1, 1500, 768])]; + tensor var_1106_cast_fp16 = transpose(perm = var_1105, x = var_1104_cast_fp16)[name = string("transpose_80")]; + tensor x_119_cast_fp16 = reshape(shape = concat_9, x = var_1106_cast_fp16)[name = string("x_119_cast_fp16")]; + tensor var_1110_to_fp16 = const()[name = string("op_1110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137338688)))]; + tensor var_1111_to_fp16 = const()[name = string("op_1111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138518400)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1111_to_fp16, weight = var_1110_to_fp16, x = x_119_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = string("x_121_cast_fp16")]; + tensor var_1118_axes_0 = const()[name = string("op_1118_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = string("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138520000)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = string("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138521600)))]; + tensor var_1118_cast_fp16 = layer_norm(axes = var_1118_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = string("op_1118_cast_fp16")]; + tensor var_1127_to_fp16 = const()[name = string("op_1127_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138523200)))]; + tensor var_1128_to_fp16 = const()[name = string("op_1128_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143241856)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1128_to_fp16, weight = var_1127_to_fp16, x = var_1118_cast_fp16)[name = string("linear_58_cast_fp16")]; + string x_125_mode_0 = const()[name = string("x_125_mode_0"), val = string("EXACT")]; + tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = string("x_125_cast_fp16")]; + tensor var_1133_to_fp16 = const()[name = string("op_1133_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143248064)))]; + tensor var_1134_to_fp16 = const()[name = string("op_1134_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147966720)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1134_to_fp16, weight = var_1133_to_fp16, x = x_125_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = string("x_127_cast_fp16")]; + int32 var_1144 = const()[name = string("op_1144"), val = int32(-1)]; + tensor var_1160_axes_0 = const()[name = string("op_1160_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = string("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147968320)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = string("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147969920)))]; + fp16 var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1160_cast_fp16 = layer_norm(axes = var_1160_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = string("op_1160_cast_fp16")]; + tensor var_1171_to_fp16 = const()[name = string("op_1171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147971520)))]; + tensor var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149151232)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1172_to_fp16, weight = var_1171_to_fp16, x = var_1160_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149152832)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1175_to_fp16, x = var_1160_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor var_1179_to_fp16 = const()[name = string("op_1179_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150332544)))]; + tensor var_1180_to_fp16 = const()[name = string("op_1180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151512256)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = var_1160_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor var_1188 = const()[name = string("op_1188"), val = tensor([1, 1500, 12, -1])]; + tensor var_1189_cast_fp16 = reshape(shape = var_1188, x = linear_60_cast_fp16)[name = string("op_1189_cast_fp16")]; + tensor const_104_to_fp16 = const()[name = string("const_104_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1189_cast_fp16, y = const_104_to_fp16)[name = string("q_43_cast_fp16")]; + tensor var_1195 = const()[name = string("op_1195"), val = tensor([1, 1500, 12, -1])]; + tensor var_1196_cast_fp16 = reshape(shape = var_1195, x = linear_61_cast_fp16)[name = string("op_1196_cast_fp16")]; + tensor const_105_to_fp16 = const()[name = string("const_105_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_43_cast_fp16 = mul(x = var_1196_cast_fp16, y = const_105_to_fp16)[name = string("k_43_cast_fp16")]; + tensor var_1202 = const()[name = string("op_1202"), val = tensor([1, 1500, 12, -1])]; + tensor var_1203_cast_fp16 = reshape(shape = var_1202, x = linear_62_cast_fp16)[name = string("op_1203_cast_fp16")]; + tensor var_1204 = const()[name = string("op_1204"), val = tensor([0, 2, 1, 3])]; + bool qk_21_transpose_x_0 = const()[name = string("qk_21_transpose_x_0"), val = bool(false)]; + bool qk_21_transpose_y_0 = const()[name = string("qk_21_transpose_y_0"), val = bool(false)]; + tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = k_43_cast_fp16)[name = string("transpose_77")]; + tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = q_43_cast_fp16)[name = string("transpose_78")]; + tensor qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_68, y = transpose_69)[name = string("qk_21_cast_fp16")]; + tensor var_1208_cast_fp16 = softmax(axis = var_1144, x = qk_21_cast_fp16)[name = string("op_1208_cast_fp16")]; + bool var_1210_transpose_x_0 = const()[name = string("op_1210_transpose_x_0"), val = bool(false)]; + bool var_1210_transpose_y_0 = const()[name = string("op_1210_transpose_y_0"), val = bool(false)]; + tensor v_43_cast_fp16 = transpose(perm = var_1204, x = var_1203_cast_fp16)[name = string("transpose_79")]; + tensor var_1210_cast_fp16 = matmul(transpose_x = var_1210_transpose_x_0, transpose_y = var_1210_transpose_y_0, x = var_1208_cast_fp16, y = v_43_cast_fp16)[name = string("op_1210_cast_fp16")]; + tensor var_1211 = const()[name = string("op_1211"), val = tensor([0, 2, 1, 3])]; + tensor concat_10 = const()[name = string("concat_10"), val = tensor([1, 1500, 768])]; + tensor var_1212_cast_fp16 = transpose(perm = var_1211, x = var_1210_cast_fp16)[name = string("transpose_76")]; + tensor x_131_cast_fp16 = reshape(shape = concat_10, x = var_1212_cast_fp16)[name = string("x_131_cast_fp16")]; + tensor var_1216_to_fp16 = const()[name = string("op_1216_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151513856)))]; + tensor var_1217_to_fp16 = const()[name = string("op_1217_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152693568)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1217_to_fp16, weight = var_1216_to_fp16, x = x_131_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = string("x_133_cast_fp16")]; + tensor var_1224_axes_0 = const()[name = string("op_1224_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = string("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152695168)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = string("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152696768)))]; + tensor var_1224_cast_fp16 = layer_norm(axes = var_1224_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = string("op_1224_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = string("op_1233_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152698368)))]; + tensor var_1234_to_fp16 = const()[name = string("op_1234_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157417024)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1234_to_fp16, weight = var_1233_to_fp16, x = var_1224_cast_fp16)[name = string("linear_64_cast_fp16")]; + string x_137_mode_0 = const()[name = string("x_137_mode_0"), val = string("EXACT")]; + tensor x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = string("x_137_cast_fp16")]; + tensor var_1239_to_fp16 = const()[name = string("op_1239_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157423232)))]; + tensor var_1240_to_fp16 = const()[name = string("op_1240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162141888)))]; + tensor linear_65_cast_fp16 = linear(bias = var_1240_to_fp16, weight = var_1239_to_fp16, x = x_137_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = string("x_139_cast_fp16")]; + int32 var_1250 = const()[name = string("op_1250"), val = int32(-1)]; + tensor var_1266_axes_0 = const()[name = string("op_1266_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = string("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162143488)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = string("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162145088)))]; + fp16 var_1256_to_fp16 = const()[name = string("op_1256_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1266_cast_fp16 = layer_norm(axes = var_1266_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = string("op_1266_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = string("op_1277_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162146688)))]; + tensor var_1278_to_fp16 = const()[name = string("op_1278_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163326400)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1278_to_fp16, weight = var_1277_to_fp16, x = var_1266_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = string("op_1281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163328000)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1281_to_fp16, x = var_1266_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = string("op_1285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164507712)))]; + tensor var_1286_to_fp16 = const()[name = string("op_1286_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165687424)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1286_to_fp16, weight = var_1285_to_fp16, x = var_1266_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor var_1294 = const()[name = string("op_1294"), val = tensor([1, 1500, 12, -1])]; + tensor var_1295_cast_fp16 = reshape(shape = var_1294, x = linear_66_cast_fp16)[name = string("op_1295_cast_fp16")]; + tensor const_106_to_fp16 = const()[name = string("const_106_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_1295_cast_fp16, y = const_106_to_fp16)[name = string("q_cast_fp16")]; + tensor var_1301 = const()[name = string("op_1301"), val = tensor([1, 1500, 12, -1])]; + tensor var_1302_cast_fp16 = reshape(shape = var_1301, x = linear_67_cast_fp16)[name = string("op_1302_cast_fp16")]; + tensor const_107_to_fp16 = const()[name = string("const_107_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_1302_cast_fp16, y = const_107_to_fp16)[name = string("k_cast_fp16")]; + tensor var_1308 = const()[name = string("op_1308"), val = tensor([1, 1500, 12, -1])]; + tensor var_1309_cast_fp16 = reshape(shape = var_1308, x = linear_68_cast_fp16)[name = string("op_1309_cast_fp16")]; + tensor var_1310 = const()[name = string("op_1310"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_70_perm_0 = const()[name = string("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = string("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = k_cast_fp16)[name = string("transpose_73")]; + tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = q_cast_fp16)[name = string("transpose_74")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_70, y = transpose_71)[name = string("qk_cast_fp16")]; + tensor var_1314_cast_fp16 = softmax(axis = var_1250, x = qk_cast_fp16)[name = string("op_1314_cast_fp16")]; + bool var_1316_transpose_x_0 = const()[name = string("op_1316_transpose_x_0"), val = bool(false)]; + bool var_1316_transpose_y_0 = const()[name = string("op_1316_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_1310, x = var_1309_cast_fp16)[name = string("transpose_75")]; + tensor var_1316_cast_fp16 = matmul(transpose_x = var_1316_transpose_x_0, transpose_y = var_1316_transpose_y_0, x = var_1314_cast_fp16, y = v_cast_fp16)[name = string("op_1316_cast_fp16")]; + tensor var_1317 = const()[name = string("op_1317"), val = tensor([0, 2, 1, 3])]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 1500, 768])]; + tensor var_1318_cast_fp16 = transpose(perm = var_1317, x = var_1316_cast_fp16)[name = string("transpose_72")]; + tensor x_143_cast_fp16 = reshape(shape = concat_11, x = var_1318_cast_fp16)[name = string("x_143_cast_fp16")]; + tensor var_1322_to_fp16 = const()[name = string("op_1322_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165689024)))]; + tensor var_1323_to_fp16 = const()[name = string("op_1323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166868736)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1323_to_fp16, weight = var_1322_to_fp16, x = x_143_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = string("x_145_cast_fp16")]; + tensor var_1330_axes_0 = const()[name = string("op_1330_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = string("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166870336)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = string("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166871936)))]; + tensor var_1330_cast_fp16 = layer_norm(axes = var_1330_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = string("op_1330_cast_fp16")]; + tensor var_1339_to_fp16 = const()[name = string("op_1339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166873536)))]; + tensor var_1340_to_fp16 = const()[name = string("op_1340_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171592192)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1340_to_fp16, weight = var_1339_to_fp16, x = var_1330_cast_fp16)[name = string("linear_70_cast_fp16")]; + string x_149_mode_0 = const()[name = string("x_149_mode_0"), val = string("EXACT")]; + tensor x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = string("x_149_cast_fp16")]; + tensor var_1345_to_fp16 = const()[name = string("op_1345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171598400)))]; + tensor var_1346_to_fp16 = const()[name = string("op_1346_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176317056)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1346_to_fp16, weight = var_1345_to_fp16, x = x_149_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_1359_axes_0 = const()[name = string("op_1359_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176318656)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176320256)))]; + fp16 var_1350_to_fp16 = const()[name = string("op_1350_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_1359_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_1350_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_1359_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/small/encoder.mlmodelc/weights/weight.bin b/small/encoder.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..af87c7443a8aec92126d4359f862e98ecdcce744 --- /dev/null +++ b/small/encoder.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3ab676977d57b06993ee7ebc638fc8568a99ddb11cb7a445328ce50fbd8b36 +size 176321856 diff --git a/small/model_dims.json b/small/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c6f3bbc78ba2e4b17edea0fe4b151ef7a091a5 --- /dev/null +++ b/small/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 768, + "n_audio_head": 12, + "n_audio_layer": 12, + "n_vocab": 51865, + "n_text_ctx": 448, + "n_text_state": 768, + "n_text_head": 12, + "n_text_layer": 12 +} \ No newline at end of file diff --git a/tiny/decoder_first.mlmodelc/analytics/coremldata.bin b/tiny/decoder_first.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9e0451492e27debd13b9046778f57690e53b2c0 --- /dev/null +++ b/tiny/decoder_first.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbd82796122288fc8db28d43a2b33ea5d8e40f8dfe5f67bb51810d9e15cfd9a +size 243 diff --git a/tiny/decoder_first.mlmodelc/coremldata.bin b/tiny/decoder_first.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..764b6b14959580bbb96df3603635a8995b081f27 --- /dev/null +++ b/tiny/decoder_first.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de85178dbee1768815281dad14b531ee364fb91c8ea89d70889ca8aabae34d70 +size 453 diff --git a/tiny/decoder_first.mlmodelc/metadata.json b/tiny/decoder_first.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..6cf5bdcc3ff0beb4bc8eea0d52f4153be48b1d9e --- /dev/null +++ b/tiny/decoder_first.mlmodelc/metadata.json @@ -0,0 +1,106 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "dummy", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.writeState" : 10, + "Shape" : 8, + "Ios18.linear" : 8, + "Identity" : 1, + "Ios18.gather" : 8, + "Ios18.concat" : 8, + "Ios18.sliceUpdate" : 10, + "Ios18.cast" : 16, + "Ios18.expandDims" : 8, + "Ios18.readState" : 10 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 448 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 448, 384]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 448 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 448, 384]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 1500 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 1500, 384]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 1500 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 1500, 384]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...1500 × 384", + "shapeRange" : "[[1, 1], [1, 1500], [384, 384]]", + "formattedType" : "MultiArray (Float16 1 × 1 × 384)", + "type" : "MultiArray", + "shape" : "[1, 1, 384]", + "name" : "audio_data", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_first", + "method" : "predict" + } +] \ No newline at end of file diff --git a/tiny/decoder_first.mlmodelc/model.mil b/tiny/decoder_first.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..9706e505fecf468b98b87c5c9b8e2e88c31f32db --- /dev/null +++ b/tiny/decoder_first.mlmodelc/model.mil @@ -0,0 +1,255 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor audio_data, state> k_cache1, state> k_cache2, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"audio_data", [1, 1, 384]}}), ("RangeDims", {{"audio_data", [[1, 1], [1, 1500], [384, 384]]}})))] { + tensor dummy = identity(x = audio_data)[name = string("identity_0")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([0, 0, 0, 0])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([0, 0, 0, 0])]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_0, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_1, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_10_write_state")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([0, 0, 0, 0])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([0, 0, 0, 0])]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = const_0_to_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_11_write_state")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor var_75_to_fp16 = const()[name = string("op_75_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1376384)))]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1671360)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_75_to_fp16, x = audio_data)[name = string("linear_0_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = string("op_79_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1672192)))]; + tensor var_80_to_fp16 = const()[name = string("op_80_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1967168)))]; + tensor linear_1_cast_fp16 = linear(bias = var_80_to_fp16, weight = var_79_to_fp16, x = audio_data)[name = string("linear_1_cast_fp16")]; + tensor var_82_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_82_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_82_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_82_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_82_shape_cast_fp16_to_int16 = cast(dtype = var_82_shape_cast_fp16_to_int16_dtype_0, x = var_82_shape_cast_fp16)[name = string("cast_31")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_82_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_11_axes_0 = const()[name = string("expand_dims_11_axes_0"), val = tensor([0])]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_30")]; + tensor expand_dims_11 = expand_dims(axes = expand_dims_11_axes_0, x = gather_0_cast_uint16_to_int32)[name = string("expand_dims_11")]; + tensor concat_5 = const()[name = string("concat_5"), val = tensor([0, 0, 0, 0])]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values3_0 = const()[name = string("concat_6_values3_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_11, concat_6_values3_0))[name = string("concat_6")]; + tensor k_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = k_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = k_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_1_stride_0, update = linear_0_cast_fp16, x = read_state_2)[name = string("k_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_1_cast_fp16, input = k_cache2)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = k_cache2)[name = string("coreml_update_state_12")]; + tensor var_87_shape_cast_fp16 = shape(x = linear_1_cast_fp16)[name = string("op_87_shape_cast_fp16")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_87_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_87_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_87_shape_cast_fp16_to_uint16 = cast(dtype = var_87_shape_cast_fp16_to_uint16_dtype_0, x = var_87_shape_cast_fp16)[name = string("cast_29")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_87_shape_cast_fp16_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_15_axes_0 = const()[name = string("expand_dims_15_axes_0"), val = tensor([0])]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_28")]; + tensor expand_dims_15 = expand_dims(axes = expand_dims_15_axes_0, x = gather_1_cast_uint16_to_int32)[name = string("expand_dims_15")]; + tensor concat_8 = const()[name = string("concat_8"), val = tensor([0, 0, 0, 0])]; + tensor concat_9_values0_0 = const()[name = string("concat_9_values0_0"), val = tensor([0])]; + tensor concat_9_values1_0 = const()[name = string("concat_9_values1_0"), val = tensor([0])]; + tensor concat_9_values3_0 = const()[name = string("concat_9_values3_0"), val = tensor([0])]; + int32 concat_9_axis_0 = const()[name = string("concat_9_axis_0"), val = int32(0)]; + bool concat_9_interleave_0 = const()[name = string("concat_9_interleave_0"), val = bool(false)]; + tensor concat_9 = concat(axis = concat_9_axis_0, interleave = concat_9_interleave_0, values = (concat_9_values0_0, concat_9_values1_0, expand_dims_15, concat_9_values3_0))[name = string("concat_9")]; + tensor v_cache2_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_8, begin_mask = v_cache2_internal_tensor_assign_1_begin_mask_0, end = concat_9, end_mask = v_cache2_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_3)[name = string("v_cache2_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_1_cast_fp16, input = v_cache2)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = v_cache2)[name = string("coreml_update_state_13")]; + tensor var_109_to_fp16 = const()[name = string("op_109_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1968000)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_109_to_fp16, x = audio_data)[name = string("linear_2_cast_fp16")]; + tensor var_113_to_fp16 = const()[name = string("op_113_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2262976)))]; + tensor var_114_to_fp16 = const()[name = string("op_114_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2557952)))]; + tensor linear_3_cast_fp16 = linear(bias = var_114_to_fp16, weight = var_113_to_fp16, x = audio_data)[name = string("linear_3_cast_fp16")]; + tensor var_116_shape_cast_fp16 = shape(x = linear_2_cast_fp16)[name = string("op_116_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_116_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_116_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_116_shape_cast_fp16_to_uint16 = cast(dtype = var_116_shape_cast_fp16_to_uint16_dtype_0, x = var_116_shape_cast_fp16)[name = string("cast_27")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_116_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_26")]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = gather_2_cast_uint16_to_int32)[name = string("expand_dims_19")]; + tensor concat_11 = const()[name = string("concat_11"), val = tensor([1, 0, 0, 0])]; + tensor concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = tensor([0])]; + tensor concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = tensor([0])]; + tensor concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = tensor([0])]; + int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; + bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; + tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, expand_dims_19, concat_12_values3_0))[name = string("concat_12")]; + tensor k_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_11, begin_mask = k_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_12, end_mask = k_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_2_stride_0, update = linear_2_cast_fp16, x = coreml_update_state_12)[name = string("k_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_2_cast_fp16, input = k_cache2)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = k_cache2)[name = string("coreml_update_state_14")]; + tensor var_121_shape_cast_fp16 = shape(x = linear_3_cast_fp16)[name = string("op_121_shape_cast_fp16")]; + int32 gather_3_axis_0 = const()[name = string("gather_3_axis_0"), val = int32(0)]; + int32 gather_3_batch_dims_0 = const()[name = string("gather_3_batch_dims_0"), val = int32(0)]; + bool gather_3_validate_indices_0 = const()[name = string("gather_3_validate_indices_0"), val = bool(false)]; + string var_121_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_121_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_3_to_uint16 = const()[name = string("select_3_to_uint16"), val = uint16(1)]; + tensor var_121_shape_cast_fp16_to_uint16 = cast(dtype = var_121_shape_cast_fp16_to_uint16_dtype_0, x = var_121_shape_cast_fp16)[name = string("cast_25")]; + uint16 gather_3_cast_uint16 = gather(axis = gather_3_axis_0, batch_dims = gather_3_batch_dims_0, indices = select_3_to_uint16, validate_indices = gather_3_validate_indices_0, x = var_121_shape_cast_fp16_to_uint16)[name = string("gather_3_cast_uint16")]; + string gather_3_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_3_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_23_axes_0 = const()[name = string("expand_dims_23_axes_0"), val = tensor([0])]; + int32 gather_3_cast_uint16_to_int32 = cast(dtype = gather_3_cast_uint16_to_int32_dtype_0, x = gather_3_cast_uint16)[name = string("cast_24")]; + tensor expand_dims_23 = expand_dims(axes = expand_dims_23_axes_0, x = gather_3_cast_uint16_to_int32)[name = string("expand_dims_23")]; + tensor concat_14 = const()[name = string("concat_14"), val = tensor([1, 0, 0, 0])]; + tensor concat_15_values0_0 = const()[name = string("concat_15_values0_0"), val = tensor([0])]; + tensor concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor([0])]; + tensor concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor([0])]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (concat_15_values0_0, concat_15_values1_0, expand_dims_23, concat_15_values3_0))[name = string("concat_15")]; + tensor v_cache2_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_14, begin_mask = v_cache2_internal_tensor_assign_2_begin_mask_0, end = concat_15, end_mask = v_cache2_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_2_stride_0, update = linear_3_cast_fp16, x = coreml_update_state_13)[name = string("v_cache2_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_2_cast_fp16, input = v_cache2)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = v_cache2)[name = string("coreml_update_state_15")]; + tensor var_143_to_fp16 = const()[name = string("op_143_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2558784)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_143_to_fp16, x = audio_data)[name = string("linear_4_cast_fp16")]; + tensor var_147_to_fp16 = const()[name = string("op_147_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2853760)))]; + tensor var_148_to_fp16 = const()[name = string("op_148_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3148736)))]; + tensor linear_5_cast_fp16 = linear(bias = var_148_to_fp16, weight = var_147_to_fp16, x = audio_data)[name = string("linear_5_cast_fp16")]; + tensor var_150_shape_cast_fp16 = shape(x = linear_4_cast_fp16)[name = string("op_150_shape_cast_fp16")]; + int32 gather_4_axis_0 = const()[name = string("gather_4_axis_0"), val = int32(0)]; + int32 gather_4_batch_dims_0 = const()[name = string("gather_4_batch_dims_0"), val = int32(0)]; + bool gather_4_validate_indices_0 = const()[name = string("gather_4_validate_indices_0"), val = bool(false)]; + string var_150_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_150_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_4_to_uint16 = const()[name = string("select_4_to_uint16"), val = uint16(1)]; + tensor var_150_shape_cast_fp16_to_uint16 = cast(dtype = var_150_shape_cast_fp16_to_uint16_dtype_0, x = var_150_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_150_shape_cast_fp16_to_uint16)[name = string("gather_4_cast_uint16")]; + string gather_4_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_4_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_27_axes_0 = const()[name = string("expand_dims_27_axes_0"), val = tensor([0])]; + int32 gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = string("cast_22")]; + tensor expand_dims_27 = expand_dims(axes = expand_dims_27_axes_0, x = gather_4_cast_uint16_to_int32)[name = string("expand_dims_27")]; + tensor concat_17 = const()[name = string("concat_17"), val = tensor([2, 0, 0, 0])]; + tensor concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = tensor([0])]; + tensor concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = tensor([0])]; + tensor concat_18_values3_0 = const()[name = string("concat_18_values3_0"), val = tensor([0])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, expand_dims_27, concat_18_values3_0))[name = string("concat_18")]; + tensor k_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_17, begin_mask = k_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_18, end_mask = k_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_3_stride_0, update = linear_4_cast_fp16, x = coreml_update_state_14)[name = string("k_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_3_cast_fp16, input = k_cache2)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = k_cache2)[name = string("coreml_update_state_16")]; + tensor var_155_shape_cast_fp16 = shape(x = linear_5_cast_fp16)[name = string("op_155_shape_cast_fp16")]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_155_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_155_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_155_shape_cast_fp16_to_uint16 = cast(dtype = var_155_shape_cast_fp16_to_uint16_dtype_0, x = var_155_shape_cast_fp16)[name = string("cast_21")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_155_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_31_axes_0 = const()[name = string("expand_dims_31_axes_0"), val = tensor([0])]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_20")]; + tensor expand_dims_31 = expand_dims(axes = expand_dims_31_axes_0, x = gather_5_cast_uint16_to_int32)[name = string("expand_dims_31")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([2, 0, 0, 0])]; + tensor concat_21_values0_0 = const()[name = string("concat_21_values0_0"), val = tensor([0])]; + tensor concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor([0])]; + tensor concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor([0])]; + int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)]; + bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)]; + tensor concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (concat_21_values0_0, concat_21_values1_0, expand_dims_31, concat_21_values3_0))[name = string("concat_21")]; + tensor v_cache2_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_cache2_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = v_cache2_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_3_stride_0, update = linear_5_cast_fp16, x = coreml_update_state_15)[name = string("v_cache2_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_3_cast_fp16, input = v_cache2)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = v_cache2)[name = string("coreml_update_state_17")]; + tensor var_177_to_fp16 = const()[name = string("op_177_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3149568)))]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = var_177_to_fp16, x = audio_data)[name = string("linear_6_cast_fp16")]; + tensor var_181_to_fp16 = const()[name = string("op_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3444544)))]; + tensor var_182_to_fp16 = const()[name = string("op_182_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3739520)))]; + tensor linear_7_cast_fp16 = linear(bias = var_182_to_fp16, weight = var_181_to_fp16, x = audio_data)[name = string("linear_7_cast_fp16")]; + tensor var_184_shape_cast_fp16 = shape(x = linear_6_cast_fp16)[name = string("op_184_shape_cast_fp16")]; + int32 gather_6_axis_0 = const()[name = string("gather_6_axis_0"), val = int32(0)]; + int32 gather_6_batch_dims_0 = const()[name = string("gather_6_batch_dims_0"), val = int32(0)]; + bool gather_6_validate_indices_0 = const()[name = string("gather_6_validate_indices_0"), val = bool(false)]; + string var_184_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_184_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_6_to_uint16 = const()[name = string("select_6_to_uint16"), val = uint16(1)]; + tensor var_184_shape_cast_fp16_to_uint16 = cast(dtype = var_184_shape_cast_fp16_to_uint16_dtype_0, x = var_184_shape_cast_fp16)[name = string("cast_19")]; + uint16 gather_6_cast_uint16 = gather(axis = gather_6_axis_0, batch_dims = gather_6_batch_dims_0, indices = select_6_to_uint16, validate_indices = gather_6_validate_indices_0, x = var_184_shape_cast_fp16_to_uint16)[name = string("gather_6_cast_uint16")]; + string gather_6_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_6_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + int32 gather_6_cast_uint16_to_int32 = cast(dtype = gather_6_cast_uint16_to_int32_dtype_0, x = gather_6_cast_uint16)[name = string("cast_18")]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = gather_6_cast_uint16_to_int32)[name = string("expand_dims_35")]; + tensor concat_23 = const()[name = string("concat_23"), val = tensor([3, 0, 0, 0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([0])]; + tensor concat_24_values1_0 = const()[name = string("concat_24_values1_0"), val = tensor([0])]; + tensor concat_24_values3_0 = const()[name = string("concat_24_values3_0"), val = tensor([0])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, concat_24_values1_0, expand_dims_35, concat_24_values3_0))[name = string("concat_24")]; + tensor k_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_23, begin_mask = k_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_24, end_mask = k_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache2_internal_tensor_assign_4_stride_0, update = linear_6_cast_fp16, x = coreml_update_state_16)[name = string("k_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache2_internal_tensor_assign_4_cast_fp16, input = k_cache2)[name = string("coreml_update_state_18_write_state")]; + tensor var_189_shape_cast_fp16 = shape(x = linear_7_cast_fp16)[name = string("op_189_shape_cast_fp16")]; + int32 gather_7_axis_0 = const()[name = string("gather_7_axis_0"), val = int32(0)]; + int32 gather_7_batch_dims_0 = const()[name = string("gather_7_batch_dims_0"), val = int32(0)]; + bool gather_7_validate_indices_0 = const()[name = string("gather_7_validate_indices_0"), val = bool(false)]; + string var_189_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_189_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_7_to_uint16 = const()[name = string("select_7_to_uint16"), val = uint16(1)]; + tensor var_189_shape_cast_fp16_to_uint16 = cast(dtype = var_189_shape_cast_fp16_to_uint16_dtype_0, x = var_189_shape_cast_fp16)[name = string("cast_17")]; + uint16 gather_7_cast_uint16 = gather(axis = gather_7_axis_0, batch_dims = gather_7_batch_dims_0, indices = select_7_to_uint16, validate_indices = gather_7_validate_indices_0, x = var_189_shape_cast_fp16_to_uint16)[name = string("gather_7_cast_uint16")]; + string gather_7_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_7_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor expand_dims_39_axes_0 = const()[name = string("expand_dims_39_axes_0"), val = tensor([0])]; + int32 gather_7_cast_uint16_to_int32 = cast(dtype = gather_7_cast_uint16_to_int32_dtype_0, x = gather_7_cast_uint16)[name = string("cast_16")]; + tensor expand_dims_39 = expand_dims(axes = expand_dims_39_axes_0, x = gather_7_cast_uint16_to_int32)[name = string("expand_dims_39")]; + tensor concat_26 = const()[name = string("concat_26"), val = tensor([3, 0, 0, 0])]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_39, concat_27_values3_0))[name = string("concat_27")]; + tensor v_cache2_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache2_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache2_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache2_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache2_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache2_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache2_internal_tensor_assign_4_begin_mask_0, end = concat_27, end_mask = v_cache2_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache2_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache2_internal_tensor_assign_4_stride_0, update = linear_7_cast_fp16, x = coreml_update_state_17)[name = string("v_cache2_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache2_internal_tensor_assign_4_cast_fp16, input = v_cache2)[name = string("coreml_update_state_19_write_state")]; + } -> (dummy); +} \ No newline at end of file diff --git a/tiny/decoder_first.mlmodelc/weights/weight.bin b/tiny/decoder_first.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..de64c07fd917e8302af1c8586935350e5cc83994 --- /dev/null +++ b/tiny/decoder_first.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0192b9200433d5cec5b0cdc705d745617174b5b3c1d833a84ae5cd3e07492f96 +size 3740352 diff --git a/tiny/decoder_second.mlmodelc/analytics/coremldata.bin b/tiny/decoder_second.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..5414acfe5c5e402939504d6124b12ef69958c316 --- /dev/null +++ b/tiny/decoder_second.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661a8ad472d4f0b68e3a47c4b7a594f40b4b67df525bed3fd6d7de55823f521d +size 243 diff --git a/tiny/decoder_second.mlmodelc/coremldata.bin b/tiny/decoder_second.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0c579a261da3e7a026491f00e6cd78364fc0653 --- /dev/null +++ b/tiny/decoder_second.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8da6d1eec33635f6e3d6e0c32263ce9b070fc43e0b5a19355951c654eb3e66 +size 487 diff --git a/tiny/decoder_second.mlmodelc/metadata.json b/tiny/decoder_second.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3637eae4f3db4041e6f4e232265ff46d19e51b6d --- /dev/null +++ b/tiny/decoder_second.mlmodelc/metadata.json @@ -0,0 +1,127 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "logits", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.linear" : 33, + "Ios18.readState" : 10, + "Ios18.expandDims" : 5, + "Ios18.sub" : 1, + "Ios18.matmul" : 16, + "Ios18.gelu" : 4, + "Ios18.gather" : 7, + "Ios18.concat" : 22, + "Shape" : 6, + "Ios18.add" : 21, + "Ios18.sliceUpdate" : 16, + "Ios18.sliceByIndex" : 33, + "Ios18.layerNorm" : 13, + "Ios18.cast" : 12, + "Ios18.transpose" : 32, + "Ios18.writeState" : 8, + "Ios18.reshape" : 32, + "Ios18.softmax" : 8, + "Ios18.mul" : 16 + }, + "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)", + "isUpdatable" : "0", + "stateSchema" : [ + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 448 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 448, 384]", + "name" : "k_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 448 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 448, 384]", + "name" : "v_cache1", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 1500 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 1500, 384]", + "name" : "k_cache2", + "type" : "State" + }, + { + "dataType" : "Float16", + "isOptional" : "0", + "formattedType" : "State (Float16 4 × 1 × 1500 × 384)", + "shortDescription" : "", + "shape" : "[4, 1, 1500, 384]", + "name" : "v_cache2", + "type" : "State" + } + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.4.1", + "com.github.apple.coremltools.version" : "8.0" + }, + "inputSchema" : [ + { + "dataType" : "Int32", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Int32 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "token_data", + "shortDescription" : "" + }, + { + "dataType" : "Float16", + "hasShapeFlexibility" : "1", + "isOptional" : "0", + "shapeFlexibility" : "1 × 1...448", + "shapeRange" : "[[1, 1], [1, 448]]", + "formattedType" : "MultiArray (Float16 1 × 1)", + "type" : "MultiArray", + "shape" : "[1, 1]", + "name" : "offset_mask", + "shortDescription" : "" + } + ], + "generatedClassName" : "decoder_second", + "method" : "predict" + } +] \ No newline at end of file diff --git a/tiny/decoder_second.mlmodelc/model.mil b/tiny/decoder_second.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..516a5a7a25dc63e55d651108a0d5f6aaa944c3af --- /dev/null +++ b/tiny/decoder_second.mlmodelc/model.mil @@ -0,0 +1,838 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(state> k_cache1, state> k_cache2, tensor offset_mask, tensor token_data, state> v_cache1, state> v_cache2) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] { + tensor var_22_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_22_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_22_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_22_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)]; + tensor var_22_shape_cast_fp16_to_int16 = cast(dtype = var_22_shape_cast_fp16_to_int16_dtype_0, x = var_22_shape_cast_fp16)[name = string("cast_58")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_22_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_26_shape = shape(x = token_data)[name = string("op_26_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_26_shape_to_uint16_dtype_0 = const()[name = string("op_26_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_26_shape_to_uint16 = cast(dtype = var_26_shape_to_uint16_dtype_0, x = var_26_shape)[name = string("cast_56")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_26_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_55")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_57")]; + int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")]; + int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)]; + int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)]; + bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)]; + tensor token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = token_data, validate_indices = var_42_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_42_cast_fp16")]; + int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)]; + int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)]; + bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)]; + tensor concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")]; + int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(384)]; + int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)]; + bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)]; + tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")]; + tensor var_45_end_mask_0 = const()[name = string("op_45_end_mask_0"), val = tensor([false, true])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39832448)))]; + tensor var_45_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_45_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_45_cast_fp16")]; + tensor x_3_cast_fp16 = add(x = var_42_cast_fp16, y = var_45_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")]; + tensor k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor([1, 1, 448, 384])]; + tensor k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")]; + tensor read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")]; + tensor v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor([1, 1, 448, 384])]; + tensor v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")]; + tensor read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")]; + tensor k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor([1, 1, 1500, 384])]; + tensor k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")]; + tensor read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")]; + tensor v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor([1, 1, 1500, 384])]; + tensor v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")]; + int32 var_67 = const()[name = string("op_67"), val = int32(-1)]; + tensor var_85_axes_0 = const()[name = string("op_85_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40176576)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40177408)))]; + fp16 var_73_to_fp16 = const()[name = string("op_73_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_85_cast_fp16 = layer_norm(axes = var_85_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_85_cast_fp16")]; + tensor var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40178240)))]; + tensor var_97_to_fp16 = const()[name = string("op_97_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40473216)))]; + tensor linear_0_cast_fp16 = linear(bias = var_97_to_fp16, weight = var_96_to_fp16, x = var_85_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_100_to_fp16 = const()[name = string("op_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40474048)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769024)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_100_to_fp16, x = var_85_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769856)))]; + tensor var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41064832)))]; + tensor linear_2_cast_fp16 = linear(bias = var_105_to_fp16, weight = var_104_to_fp16, x = var_85_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_107_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_107_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_107_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_107_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_107_shape_cast_fp16_to_uint16 = cast(dtype = var_107_shape_cast_fp16_to_uint16_dtype_0, x = var_107_shape_cast_fp16)[name = string("cast_54")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_107_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_53")]; + int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor([0])]; + tensor expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor([0])]; + tensor expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor([0])]; + tensor expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")]; + tensor concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor([0])]; + int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)]; + bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)]; + tensor concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + tensor concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor([0])]; + tensor concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")]; + tensor k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_8 = read_state(input = k_cache1)[name = string("coreml_update_state_8")]; + tensor v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_9 = read_state(input = v_cache1)[name = string("coreml_update_state_9")]; + int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)]; + int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(384)]; + int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)]; + bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)]; + tensor concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")]; + tensor var_123_begin_0 = const()[name = string("op_123_begin_0"), val = tensor([0, 0, 0])]; + tensor var_123_end_mask_0 = const()[name = string("op_123_end_mask_0"), val = tensor([true, false, true])]; + tensor var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = concat_10, end_mask = var_123_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_123_cast_fp16")]; + tensor var_126_begin_0 = const()[name = string("op_126_begin_0"), val = tensor([0, 0, 0])]; + tensor var_126_end_mask_0 = const()[name = string("op_126_end_mask_0"), val = tensor([true, false, true])]; + tensor var_126_cast_fp16 = slice_by_index(begin = var_126_begin_0, end = concat_10, end_mask = var_126_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_126_cast_fp16")]; + tensor concat_12x = const()[name = string("concat_12x"), val = tensor([1, -1, 6, 64])]; + tensor var_136_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_136_cast_fp16")]; + tensor const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_20_to_fp16)[name = string("q_3_cast_fp16")]; + tensor concat_13x = const()[name = string("concat_13x"), val = tensor([1, -1, 6, 64])]; + tensor var_143_cast_fp16 = reshape(shape = concat_13x, x = var_123_cast_fp16)[name = string("op_143_cast_fp16")]; + tensor const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_5_cast_fp16 = mul(x = var_143_cast_fp16, y = const_21_to_fp16)[name = string("k_5_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, -1, 6, 64])]; + tensor var_150_cast_fp16 = reshape(shape = concat_14x, x = var_126_cast_fp16)[name = string("op_150_cast_fp16")]; + tensor var_151 = const()[name = string("op_151"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_34 = transpose(perm = transpose_34_perm_0, x = k_5_cast_fp16)[name = string("transpose_78")]; + tensor transpose_33 = transpose(perm = transpose_33_perm_0, x = q_3_cast_fp16)[name = string("transpose_79")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_33, y = transpose_34)[name = string("qk_1_cast_fp16")]; + int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")]; + tensor var_154_begin_0 = const()[name = string("op_154_begin_0"), val = tensor([0, 0])]; + tensor var_154_end_mask_0 = const()[name = string("op_154_end_mask_0"), val = tensor([false, true])]; + tensor mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41065664)))]; + tensor var_154_cast_fp16 = slice_by_index(begin = var_154_begin_0, end = concat_15, end_mask = var_154_end_mask_0, x = mask_to_fp16)[name = string("op_154_cast_fp16")]; + int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)]; + int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; + bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; + tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")]; + tensor var_155_begin_0 = const()[name = string("op_155_begin_0"), val = tensor([0, 0])]; + tensor var_155_end_mask_0 = const()[name = string("op_155_end_mask_0"), val = tensor([true, false])]; + tensor var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = concat_16, end_mask = var_155_end_mask_0, x = var_154_cast_fp16)[name = string("op_155_cast_fp16")]; + tensor qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_155_cast_fp16)[name = string("qk_3_cast_fp16")]; + tensor var_158_cast_fp16 = softmax(axis = var_67, x = qk_3_cast_fp16)[name = string("op_158_cast_fp16")]; + bool var_160_transpose_x_0 = const()[name = string("op_160_transpose_x_0"), val = bool(false)]; + bool var_160_transpose_y_0 = const()[name = string("op_160_transpose_y_0"), val = bool(false)]; + tensor v_5_cast_fp16 = transpose(perm = var_151, x = var_150_cast_fp16)[name = string("transpose_80")]; + tensor var_160_cast_fp16 = matmul(transpose_x = var_160_transpose_x_0, transpose_y = var_160_transpose_y_0, x = var_158_cast_fp16, y = v_5_cast_fp16)[name = string("op_160_cast_fp16")]; + tensor var_161 = const()[name = string("op_161"), val = tensor([0, 2, 1, 3])]; + tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, -1, 384])]; + tensor var_162_cast_fp16 = transpose(perm = var_161, x = var_160_cast_fp16)[name = string("transpose_77")]; + tensor x_7_cast_fp16 = reshape(shape = concat_17x, x = var_162_cast_fp16)[name = string("x_7_cast_fp16")]; + tensor var_166_to_fp16 = const()[name = string("op_166_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41467136)))]; + tensor var_167_to_fp16 = const()[name = string("op_167_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762112)))]; + tensor linear_3_cast_fp16 = linear(bias = var_167_to_fp16, weight = var_166_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")]; + tensor var_174_axes_0 = const()[name = string("op_174_axes_0"), val = tensor([-1])]; + tensor blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762944)))]; + tensor blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41763776)))]; + tensor var_174_cast_fp16 = layer_norm(axes = var_174_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_174_cast_fp16")]; + tensor var_183_to_fp16 = const()[name = string("op_183_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41764608)))]; + tensor var_184_to_fp16 = const()[name = string("op_184_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42059584)))]; + tensor linear_4_cast_fp16 = linear(bias = var_184_to_fp16, weight = var_183_to_fp16, x = var_174_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor concat_18 = const()[name = string("concat_18"), val = tensor([0, 0, 0])]; + tensor concat_19 = const()[name = string("concat_19"), val = tensor([0, 1500, 0])]; + tensor k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42060416)))]; + tensor k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_20 = const()[name = string("concat_20"), val = tensor([0, 0, 0])]; + tensor concat_21 = const()[name = string("concat_21"), val = tensor([0, 1500, 0])]; + tensor v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")]; + tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 6, 64])]; + tensor var_204_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_204_cast_fp16")]; + tensor const_22_to_fp16 = const()[name = string("const_22_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_204_cast_fp16, y = const_22_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_210 = const()[name = string("op_210"), val = tensor([1, 1500, 6, -1])]; + tensor var_211_cast_fp16 = reshape(shape = var_210, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_211_cast_fp16")]; + tensor const_23_to_fp16 = const()[name = string("const_23_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_9_cast_fp16 = mul(x = var_211_cast_fp16, y = const_23_to_fp16)[name = string("k_9_cast_fp16")]; + tensor var_217 = const()[name = string("op_217"), val = tensor([1, 1500, 6, -1])]; + tensor var_218_cast_fp16 = reshape(shape = var_217, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_218_cast_fp16")]; + tensor var_219 = const()[name = string("op_219"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_36 = transpose(perm = transpose_36_perm_0, x = k_9_cast_fp16)[name = string("transpose_74")]; + tensor transpose_35 = transpose(perm = transpose_35_perm_0, x = q_7_cast_fp16)[name = string("transpose_75")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_35, y = transpose_36)[name = string("qk_5_cast_fp16")]; + tensor var_223_cast_fp16 = softmax(axis = var_67, x = qk_5_cast_fp16)[name = string("op_223_cast_fp16")]; + bool var_225_transpose_x_0 = const()[name = string("op_225_transpose_x_0"), val = bool(false)]; + bool var_225_transpose_y_0 = const()[name = string("op_225_transpose_y_0"), val = bool(false)]; + tensor v_9_cast_fp16 = transpose(perm = var_219, x = var_218_cast_fp16)[name = string("transpose_76")]; + tensor var_225_cast_fp16 = matmul(transpose_x = var_225_transpose_x_0, transpose_y = var_225_transpose_y_0, x = var_223_cast_fp16, y = v_9_cast_fp16)[name = string("op_225_cast_fp16")]; + tensor var_226 = const()[name = string("op_226"), val = tensor([0, 2, 1, 3])]; + tensor concat_23x = const()[name = string("concat_23x"), val = tensor([1, -1, 384])]; + tensor var_227_cast_fp16 = transpose(perm = var_226, x = var_225_cast_fp16)[name = string("transpose_73")]; + tensor x_13_cast_fp16 = reshape(shape = concat_23x, x = var_227_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_231_to_fp16 = const()[name = string("op_231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43212480)))]; + tensor var_232_to_fp16 = const()[name = string("op_232_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43507456)))]; + tensor linear_5_cast_fp16 = linear(bias = var_232_to_fp16, weight = var_231_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")]; + tensor var_239_axes_0 = const()[name = string("op_239_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43508288)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509120)))]; + tensor var_239_cast_fp16 = layer_norm(axes = var_239_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_239_cast_fp16")]; + tensor var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509952)))]; + tensor var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44689664)))]; + tensor linear_6_cast_fp16 = linear(bias = var_249_to_fp16, weight = var_248_to_fp16, x = var_239_cast_fp16)[name = string("linear_6_cast_fp16")]; + string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")]; + tensor x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")]; + tensor var_254_to_fp16 = const()[name = string("op_254_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44692800)))]; + tensor var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45872512)))]; + tensor linear_7_cast_fp16 = linear(bias = var_255_to_fp16, weight = var_254_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")]; + tensor k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor([2, 1, 448, 384])]; + tensor k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_8)[name = string("k_cache_5_cast_fp16")]; + tensor v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor([2, 1, 448, 384])]; + tensor v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_9)[name = string("v_cache_5_cast_fp16")]; + tensor k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor([2, 1, 1500, 384])]; + tensor k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")]; + tensor v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor([2, 1, 1500, 384])]; + tensor v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")]; + int32 var_277 = const()[name = string("op_277"), val = int32(-1)]; + tensor var_295_axes_0 = const()[name = string("op_295_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45873344)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45874176)))]; + fp16 var_283_to_fp16 = const()[name = string("op_283_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_295_cast_fp16 = layer_norm(axes = var_295_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_295_cast_fp16")]; + tensor var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45875008)))]; + tensor var_307_to_fp16 = const()[name = string("op_307_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46169984)))]; + tensor linear_8_cast_fp16 = linear(bias = var_307_to_fp16, weight = var_306_to_fp16, x = var_295_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46170816)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_310_to_fp16, x = var_295_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor var_314_to_fp16 = const()[name = string("op_314_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46465792)))]; + tensor var_315_to_fp16 = const()[name = string("op_315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46760768)))]; + tensor linear_10_cast_fp16 = linear(bias = var_315_to_fp16, weight = var_314_to_fp16, x = var_295_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor var_317_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_317_shape_cast_fp16")]; + int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)]; + int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)]; + bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)]; + string var_317_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_317_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)]; + tensor var_317_shape_cast_fp16_to_uint16 = cast(dtype = var_317_shape_cast_fp16_to_uint16_dtype_0, x = var_317_shape_cast_fp16)[name = string("cast_52")]; + uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_317_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")]; + string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_51")]; + int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")]; + tensor expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor([0])]; + tensor expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor([0])]; + tensor expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor([0])]; + tensor expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")]; + tensor concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor([1])]; + int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)]; + bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)]; + tensor concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")]; + tensor concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor([0])]; + tensor concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor([0])]; + tensor concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor([0])]; + int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)]; + bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)]; + tensor concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")]; + tensor k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_8)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_10 = read_state(input = k_cache1)[name = string("coreml_update_state_10")]; + tensor v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_9)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_11 = read_state(input = v_cache1)[name = string("coreml_update_state_11")]; + int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)]; + int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(384)]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")]; + tensor var_333_begin_0 = const()[name = string("op_333_begin_0"), val = tensor([0, 0, 0])]; + tensor var_333_end_mask_0 = const()[name = string("op_333_end_mask_0"), val = tensor([true, false, true])]; + tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = concat_32, end_mask = var_333_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_333_cast_fp16")]; + tensor var_336_begin_0 = const()[name = string("op_336_begin_0"), val = tensor([0, 0, 0])]; + tensor var_336_end_mask_0 = const()[name = string("op_336_end_mask_0"), val = tensor([true, false, true])]; + tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = concat_32, end_mask = var_336_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_336_cast_fp16")]; + tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, -1, 6, 64])]; + tensor var_346_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_346_cast_fp16")]; + tensor const_24_to_fp16 = const()[name = string("const_24_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_346_cast_fp16, y = const_24_to_fp16)[name = string("q_11_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, -1, 6, 64])]; + tensor var_353_cast_fp16 = reshape(shape = concat_35x, x = var_333_cast_fp16)[name = string("op_353_cast_fp16")]; + tensor const_25_to_fp16 = const()[name = string("const_25_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_353_cast_fp16, y = const_25_to_fp16)[name = string("k_15_cast_fp16")]; + tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, -1, 6, 64])]; + tensor var_360_cast_fp16 = reshape(shape = concat_36x, x = var_336_cast_fp16)[name = string("op_360_cast_fp16")]; + tensor var_361 = const()[name = string("op_361"), val = tensor([0, 2, 1, 3])]; + bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)]; + bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)]; + tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_38 = transpose(perm = transpose_38_perm_0, x = k_15_cast_fp16)[name = string("transpose_70")]; + tensor transpose_37 = transpose(perm = transpose_37_perm_0, x = q_11_cast_fp16)[name = string("transpose_71")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_37, y = transpose_38)[name = string("qk_7_cast_fp16")]; + int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")]; + tensor var_364_begin_0 = const()[name = string("op_364_begin_0"), val = tensor([0, 0])]; + tensor var_364_end_mask_0 = const()[name = string("op_364_end_mask_0"), val = tensor([false, true])]; + tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = concat_37, end_mask = var_364_end_mask_0, x = mask_to_fp16)[name = string("op_364_cast_fp16")]; + int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)]; + int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; + bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; + tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")]; + tensor var_365_begin_0 = const()[name = string("op_365_begin_0"), val = tensor([0, 0])]; + tensor var_365_end_mask_0 = const()[name = string("op_365_end_mask_0"), val = tensor([true, false])]; + tensor var_365_cast_fp16 = slice_by_index(begin = var_365_begin_0, end = concat_38, end_mask = var_365_end_mask_0, x = var_364_cast_fp16)[name = string("op_365_cast_fp16")]; + tensor qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_365_cast_fp16)[name = string("qk_9_cast_fp16")]; + tensor var_368_cast_fp16 = softmax(axis = var_277, x = qk_9_cast_fp16)[name = string("op_368_cast_fp16")]; + bool var_370_transpose_x_0 = const()[name = string("op_370_transpose_x_0"), val = bool(false)]; + bool var_370_transpose_y_0 = const()[name = string("op_370_transpose_y_0"), val = bool(false)]; + tensor v_15_cast_fp16 = transpose(perm = var_361, x = var_360_cast_fp16)[name = string("transpose_72")]; + tensor var_370_cast_fp16 = matmul(transpose_x = var_370_transpose_x_0, transpose_y = var_370_transpose_y_0, x = var_368_cast_fp16, y = v_15_cast_fp16)[name = string("op_370_cast_fp16")]; + tensor var_371 = const()[name = string("op_371"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 384])]; + tensor var_372_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_69")]; + tensor x_25_cast_fp16 = reshape(shape = concat_39x, x = var_372_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_376_to_fp16 = const()[name = string("op_376_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46761600)))]; + tensor var_377_to_fp16 = const()[name = string("op_377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47056576)))]; + tensor linear_11_cast_fp16 = linear(bias = var_377_to_fp16, weight = var_376_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")]; + tensor var_384_axes_0 = const()[name = string("op_384_axes_0"), val = tensor([-1])]; + tensor blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47057408)))]; + tensor blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47058240)))]; + tensor var_384_cast_fp16 = layer_norm(axes = var_384_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_384_cast_fp16")]; + tensor var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47059072)))]; + tensor var_394_to_fp16 = const()[name = string("op_394_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354048)))]; + tensor linear_12_cast_fp16 = linear(bias = var_394_to_fp16, weight = var_393_to_fp16, x = var_384_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor concat_40 = const()[name = string("concat_40"), val = tensor([0, 0, 0])]; + tensor concat_41 = const()[name = string("concat_41"), val = tensor([0, 1500, 0])]; + tensor k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_42 = const()[name = string("concat_42"), val = tensor([0, 0, 0])]; + tensor concat_43 = const()[name = string("concat_43"), val = tensor([0, 1500, 0])]; + tensor v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")]; + tensor concat_44x = const()[name = string("concat_44x"), val = tensor([1, -1, 6, 64])]; + tensor var_414_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_414_cast_fp16")]; + tensor const_26_to_fp16 = const()[name = string("const_26_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_414_cast_fp16, y = const_26_to_fp16)[name = string("q_15_cast_fp16")]; + tensor var_420 = const()[name = string("op_420"), val = tensor([1, 1500, 6, -1])]; + tensor var_421_cast_fp16 = reshape(shape = var_420, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_421_cast_fp16")]; + tensor const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_421_cast_fp16, y = const_27_to_fp16)[name = string("k_19_cast_fp16")]; + tensor var_427 = const()[name = string("op_427"), val = tensor([1, 1500, 6, -1])]; + tensor var_428_cast_fp16 = reshape(shape = var_427, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_428_cast_fp16")]; + tensor var_429 = const()[name = string("op_429"), val = tensor([0, 2, 1, 3])]; + bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)]; + bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)]; + tensor transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_40 = transpose(perm = transpose_40_perm_0, x = k_19_cast_fp16)[name = string("transpose_66")]; + tensor transpose_39 = transpose(perm = transpose_39_perm_0, x = q_15_cast_fp16)[name = string("transpose_67")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_39, y = transpose_40)[name = string("qk_11_cast_fp16")]; + tensor var_433_cast_fp16 = softmax(axis = var_277, x = qk_11_cast_fp16)[name = string("op_433_cast_fp16")]; + bool var_435_transpose_x_0 = const()[name = string("op_435_transpose_x_0"), val = bool(false)]; + bool var_435_transpose_y_0 = const()[name = string("op_435_transpose_y_0"), val = bool(false)]; + tensor v_19_cast_fp16 = transpose(perm = var_429, x = var_428_cast_fp16)[name = string("transpose_68")]; + tensor var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_0, transpose_y = var_435_transpose_y_0, x = var_433_cast_fp16, y = v_19_cast_fp16)[name = string("op_435_cast_fp16")]; + tensor var_436 = const()[name = string("op_436"), val = tensor([0, 2, 1, 3])]; + tensor concat_45x = const()[name = string("concat_45x"), val = tensor([1, -1, 384])]; + tensor var_437_cast_fp16 = transpose(perm = var_436, x = var_435_cast_fp16)[name = string("transpose_65")]; + tensor x_31_cast_fp16 = reshape(shape = concat_45x, x = var_437_cast_fp16)[name = string("x_31_cast_fp16")]; + tensor var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354880)))]; + tensor var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47649856)))]; + tensor linear_13_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")]; + tensor var_449_axes_0 = const()[name = string("op_449_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47650688)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47651520)))]; + tensor var_449_cast_fp16 = layer_norm(axes = var_449_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_449_cast_fp16")]; + tensor var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47652352)))]; + tensor var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48832064)))]; + tensor linear_14_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = var_449_cast_fp16)[name = string("linear_14_cast_fp16")]; + string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")]; + tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_464_to_fp16 = const()[name = string("op_464_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48835200)))]; + tensor var_465_to_fp16 = const()[name = string("op_465_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50014912)))]; + tensor linear_15_cast_fp16 = linear(bias = var_465_to_fp16, weight = var_464_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")]; + tensor k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor([3, 1, 448, 384])]; + tensor k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_10)[name = string("k_cache_9_cast_fp16")]; + tensor v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor([3, 1, 448, 384])]; + tensor v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_11)[name = string("v_cache_9_cast_fp16")]; + tensor k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor([3, 1, 1500, 384])]; + tensor k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")]; + tensor v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor([3, 1, 1500, 384])]; + tensor v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")]; + int32 var_487 = const()[name = string("op_487"), val = int32(-1)]; + tensor var_505_axes_0 = const()[name = string("op_505_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50015744)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50016576)))]; + fp16 var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_505_cast_fp16 = layer_norm(axes = var_505_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_505_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50017408)))]; + tensor var_517_to_fp16 = const()[name = string("op_517_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50312384)))]; + tensor linear_16_cast_fp16 = linear(bias = var_517_to_fp16, weight = var_516_to_fp16, x = var_505_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = string("op_520_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50313216)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_520_to_fp16, x = var_505_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50608192)))]; + tensor var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50903168)))]; + tensor linear_18_cast_fp16 = linear(bias = var_525_to_fp16, weight = var_524_to_fp16, x = var_505_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_527_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_527_shape_cast_fp16")]; + int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)]; + int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)]; + bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)]; + string var_527_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_527_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)]; + tensor var_527_shape_cast_fp16_to_uint16 = cast(dtype = var_527_shape_cast_fp16_to_uint16_dtype_0, x = var_527_shape_cast_fp16)[name = string("cast_50")]; + uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_527_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")]; + string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_49")]; + int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")]; + tensor expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor([0])]; + tensor expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor([0])]; + tensor expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor([0])]; + tensor expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")]; + tensor concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor([2])]; + int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)]; + bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)]; + tensor concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")]; + tensor concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor([0])]; + tensor concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor([0])]; + tensor concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor([0])]; + int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)]; + bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)]; + tensor concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")]; + tensor k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_10)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")]; + tensor v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_11)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")]; + int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)]; + int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(384)]; + int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; + bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; + tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")]; + tensor var_543_begin_0 = const()[name = string("op_543_begin_0"), val = tensor([0, 0, 0])]; + tensor var_543_end_mask_0 = const()[name = string("op_543_end_mask_0"), val = tensor([true, false, true])]; + tensor var_543_cast_fp16 = slice_by_index(begin = var_543_begin_0, end = concat_54, end_mask = var_543_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_543_cast_fp16")]; + tensor var_546_begin_0 = const()[name = string("op_546_begin_0"), val = tensor([0, 0, 0])]; + tensor var_546_end_mask_0 = const()[name = string("op_546_end_mask_0"), val = tensor([true, false, true])]; + tensor var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = concat_54, end_mask = var_546_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_546_cast_fp16")]; + tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 6, 64])]; + tensor var_556_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_556_cast_fp16")]; + tensor const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_556_cast_fp16, y = const_28_to_fp16)[name = string("q_19_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 6, 64])]; + tensor var_563_cast_fp16 = reshape(shape = concat_57x, x = var_543_cast_fp16)[name = string("op_563_cast_fp16")]; + tensor const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_25_cast_fp16 = mul(x = var_563_cast_fp16, y = const_29_to_fp16)[name = string("k_25_cast_fp16")]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 6, 64])]; + tensor var_570_cast_fp16 = reshape(shape = concat_58x, x = var_546_cast_fp16)[name = string("op_570_cast_fp16")]; + tensor var_571 = const()[name = string("op_571"), val = tensor([0, 2, 1, 3])]; + bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)]; + bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)]; + tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_42 = transpose(perm = transpose_42_perm_0, x = k_25_cast_fp16)[name = string("transpose_62")]; + tensor transpose_41 = transpose(perm = transpose_41_perm_0, x = q_19_cast_fp16)[name = string("transpose_63")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_41, y = transpose_42)[name = string("qk_13_cast_fp16")]; + int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)]; + int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)]; + bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)]; + tensor concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")]; + tensor var_574_begin_0 = const()[name = string("op_574_begin_0"), val = tensor([0, 0])]; + tensor var_574_end_mask_0 = const()[name = string("op_574_end_mask_0"), val = tensor([false, true])]; + tensor var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = concat_59, end_mask = var_574_end_mask_0, x = mask_to_fp16)[name = string("op_574_cast_fp16")]; + int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)]; + int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)]; + bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)]; + tensor concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")]; + tensor var_575_begin_0 = const()[name = string("op_575_begin_0"), val = tensor([0, 0])]; + tensor var_575_end_mask_0 = const()[name = string("op_575_end_mask_0"), val = tensor([true, false])]; + tensor var_575_cast_fp16 = slice_by_index(begin = var_575_begin_0, end = concat_60, end_mask = var_575_end_mask_0, x = var_574_cast_fp16)[name = string("op_575_cast_fp16")]; + tensor qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_575_cast_fp16)[name = string("qk_15_cast_fp16")]; + tensor var_578_cast_fp16 = softmax(axis = var_487, x = qk_15_cast_fp16)[name = string("op_578_cast_fp16")]; + bool var_580_transpose_x_0 = const()[name = string("op_580_transpose_x_0"), val = bool(false)]; + bool var_580_transpose_y_0 = const()[name = string("op_580_transpose_y_0"), val = bool(false)]; + tensor v_25_cast_fp16 = transpose(perm = var_571, x = var_570_cast_fp16)[name = string("transpose_64")]; + tensor var_580_cast_fp16 = matmul(transpose_x = var_580_transpose_x_0, transpose_y = var_580_transpose_y_0, x = var_578_cast_fp16, y = v_25_cast_fp16)[name = string("op_580_cast_fp16")]; + tensor var_581 = const()[name = string("op_581"), val = tensor([0, 2, 1, 3])]; + tensor concat_61x = const()[name = string("concat_61x"), val = tensor([1, -1, 384])]; + tensor var_582_cast_fp16 = transpose(perm = var_581, x = var_580_cast_fp16)[name = string("transpose_61")]; + tensor x_43_cast_fp16 = reshape(shape = concat_61x, x = var_582_cast_fp16)[name = string("x_43_cast_fp16")]; + tensor var_586_to_fp16 = const()[name = string("op_586_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50904000)))]; + tensor var_587_to_fp16 = const()[name = string("op_587_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51198976)))]; + tensor linear_19_cast_fp16 = linear(bias = var_587_to_fp16, weight = var_586_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")]; + tensor var_594_axes_0 = const()[name = string("op_594_axes_0"), val = tensor([-1])]; + tensor blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51199808)))]; + tensor blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51200640)))]; + tensor var_594_cast_fp16 = layer_norm(axes = var_594_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_594_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51201472)))]; + tensor var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51496448)))]; + tensor linear_20_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = var_594_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor concat_62 = const()[name = string("concat_62"), val = tensor([0, 0, 0])]; + tensor concat_63 = const()[name = string("concat_63"), val = tensor([0, 1500, 0])]; + tensor k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_64 = const()[name = string("concat_64"), val = tensor([0, 0, 0])]; + tensor concat_65 = const()[name = string("concat_65"), val = tensor([0, 1500, 0])]; + tensor v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")]; + tensor concat_66x = const()[name = string("concat_66x"), val = tensor([1, -1, 6, 64])]; + tensor var_624_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_624_cast_fp16")]; + tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_624_cast_fp16, y = const_30_to_fp16)[name = string("q_23_cast_fp16")]; + tensor var_630 = const()[name = string("op_630"), val = tensor([1, 1500, 6, -1])]; + tensor var_631_cast_fp16 = reshape(shape = var_630, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_631_cast_fp16")]; + tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_29_cast_fp16 = mul(x = var_631_cast_fp16, y = const_31_to_fp16)[name = string("k_29_cast_fp16")]; + tensor var_637 = const()[name = string("op_637"), val = tensor([1, 1500, 6, -1])]; + tensor var_638_cast_fp16 = reshape(shape = var_637, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_638_cast_fp16")]; + tensor var_639 = const()[name = string("op_639"), val = tensor([0, 2, 1, 3])]; + bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)]; + bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)]; + tensor transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_44 = transpose(perm = transpose_44_perm_0, x = k_29_cast_fp16)[name = string("transpose_58")]; + tensor transpose_43 = transpose(perm = transpose_43_perm_0, x = q_23_cast_fp16)[name = string("transpose_59")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_43, y = transpose_44)[name = string("qk_17_cast_fp16")]; + tensor var_643_cast_fp16 = softmax(axis = var_487, x = qk_17_cast_fp16)[name = string("op_643_cast_fp16")]; + bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)]; + bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)]; + tensor v_29_cast_fp16 = transpose(perm = var_639, x = var_638_cast_fp16)[name = string("transpose_60")]; + tensor var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = var_643_cast_fp16, y = v_29_cast_fp16)[name = string("op_645_cast_fp16")]; + tensor var_646 = const()[name = string("op_646"), val = tensor([0, 2, 1, 3])]; + tensor concat_67x = const()[name = string("concat_67x"), val = tensor([1, -1, 384])]; + tensor var_647_cast_fp16 = transpose(perm = var_646, x = var_645_cast_fp16)[name = string("transpose_57")]; + tensor x_49_cast_fp16 = reshape(shape = concat_67x, x = var_647_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51497280)))]; + tensor var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51792256)))]; + tensor linear_21_cast_fp16 = linear(bias = var_652_to_fp16, weight = var_651_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")]; + tensor var_659_axes_0 = const()[name = string("op_659_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793088)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793920)))]; + tensor var_659_cast_fp16 = layer_norm(axes = var_659_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_659_cast_fp16")]; + tensor var_668_to_fp16 = const()[name = string("op_668_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51794752)))]; + tensor var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52974464)))]; + tensor linear_22_cast_fp16 = linear(bias = var_669_to_fp16, weight = var_668_to_fp16, x = var_659_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")]; + tensor x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")]; + tensor var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52977600)))]; + tensor var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54157312)))]; + tensor linear_23_cast_fp16 = linear(bias = var_675_to_fp16, weight = var_674_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")]; + tensor k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor([4, 1, 448, 384])]; + tensor k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_13_cast_fp16")]; + tensor v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor([4, 1, 448, 384])]; + tensor v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_13_cast_fp16")]; + tensor k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor([4, 1, 1500, 384])]; + tensor k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")]; + tensor v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor([4, 1, 1500, 384])]; + tensor v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor([false, true, true, true])]; + tensor v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")]; + int32 var_697 = const()[name = string("op_697"), val = int32(-1)]; + tensor var_715_axes_0 = const()[name = string("op_715_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158144)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158976)))]; + fp16 var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_715_cast_fp16 = layer_norm(axes = var_715_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_715_cast_fp16")]; + tensor var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54159808)))]; + tensor var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54454784)))]; + tensor linear_24_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = var_715_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor var_730_to_fp16 = const()[name = string("op_730_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54455616)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_730_to_fp16, x = var_715_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54750592)))]; + tensor var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55045568)))]; + tensor linear_26_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = var_715_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor var_737_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_737_shape_cast_fp16")]; + int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)]; + int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)]; + bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)]; + string var_737_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_737_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)]; + tensor var_737_shape_cast_fp16_to_uint16 = cast(dtype = var_737_shape_cast_fp16_to_uint16_dtype_0, x = var_737_shape_cast_fp16)[name = string("cast_48")]; + uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_737_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")]; + string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_47")]; + int32 end_step = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor([0])]; + tensor expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor([0])]; + tensor expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step)[name = string("expand_dims_51")]; + tensor concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor([3])]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")]; + tensor concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor([0])]; + tensor concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor([0])]; + tensor concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor([0])]; + int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; + bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; + tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")]; + tensor k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")]; + tensor v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1])]; + tensor v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, true])]; + tensor v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false])]; + tensor v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")]; + write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")]; + int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)]; + int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(384)]; + int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; + bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; + tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step, concat_76_values2_0))[name = string("concat_76")]; + tensor var_753_begin_0 = const()[name = string("op_753_begin_0"), val = tensor([0, 0, 0])]; + tensor var_753_end_mask_0 = const()[name = string("op_753_end_mask_0"), val = tensor([true, false, true])]; + tensor var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = concat_76, end_mask = var_753_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_753_cast_fp16")]; + tensor var_756_begin_0 = const()[name = string("op_756_begin_0"), val = tensor([0, 0, 0])]; + tensor var_756_end_mask_0 = const()[name = string("op_756_end_mask_0"), val = tensor([true, false, true])]; + tensor var_756_cast_fp16 = slice_by_index(begin = var_756_begin_0, end = concat_76, end_mask = var_756_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_756_cast_fp16")]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 6, 64])]; + tensor var_766_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_766_cast_fp16")]; + tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_766_cast_fp16, y = const_32_to_fp16)[name = string("q_27_cast_fp16")]; + tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 6, 64])]; + tensor var_773_cast_fp16 = reshape(shape = concat_79x, x = var_753_cast_fp16)[name = string("op_773_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_773_cast_fp16, y = const_33_to_fp16)[name = string("k_35_cast_fp16")]; + tensor concat_80x = const()[name = string("concat_80x"), val = tensor([1, -1, 6, 64])]; + tensor var_780_cast_fp16 = reshape(shape = concat_80x, x = var_756_cast_fp16)[name = string("op_780_cast_fp16")]; + tensor var_781 = const()[name = string("op_781"), val = tensor([0, 2, 1, 3])]; + bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)]; + bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)]; + tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_46 = transpose(perm = transpose_46_perm_0, x = k_35_cast_fp16)[name = string("transpose_54")]; + tensor transpose_45 = transpose(perm = transpose_45_perm_0, x = q_27_cast_fp16)[name = string("transpose_55")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_45, y = transpose_46)[name = string("qk_19_cast_fp16")]; + int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")]; + tensor var_784_begin_0 = const()[name = string("op_784_begin_0"), val = tensor([0, 0])]; + tensor var_784_end_mask_0 = const()[name = string("op_784_end_mask_0"), val = tensor([false, true])]; + tensor var_784_cast_fp16 = slice_by_index(begin = var_784_begin_0, end = concat_81, end_mask = var_784_end_mask_0, x = mask_to_fp16)[name = string("op_784_cast_fp16")]; + int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)]; + int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; + bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; + tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")]; + tensor var_785_begin_0 = const()[name = string("op_785_begin_0"), val = tensor([0, 0])]; + tensor var_785_end_mask_0 = const()[name = string("op_785_end_mask_0"), val = tensor([true, false])]; + tensor var_785_cast_fp16 = slice_by_index(begin = var_785_begin_0, end = concat_82, end_mask = var_785_end_mask_0, x = var_784_cast_fp16)[name = string("op_785_cast_fp16")]; + tensor qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_785_cast_fp16)[name = string("qk_21_cast_fp16")]; + tensor var_788_cast_fp16 = softmax(axis = var_697, x = qk_21_cast_fp16)[name = string("op_788_cast_fp16")]; + bool var_790_transpose_x_0 = const()[name = string("op_790_transpose_x_0"), val = bool(false)]; + bool var_790_transpose_y_0 = const()[name = string("op_790_transpose_y_0"), val = bool(false)]; + tensor v_35_cast_fp16 = transpose(perm = var_781, x = var_780_cast_fp16)[name = string("transpose_56")]; + tensor var_790_cast_fp16 = matmul(transpose_x = var_790_transpose_x_0, transpose_y = var_790_transpose_y_0, x = var_788_cast_fp16, y = v_35_cast_fp16)[name = string("op_790_cast_fp16")]; + tensor var_791 = const()[name = string("op_791"), val = tensor([0, 2, 1, 3])]; + tensor concat_83x = const()[name = string("concat_83x"), val = tensor([1, -1, 384])]; + tensor var_792_cast_fp16 = transpose(perm = var_791, x = var_790_cast_fp16)[name = string("transpose_53")]; + tensor x_61_cast_fp16 = reshape(shape = concat_83x, x = var_792_cast_fp16)[name = string("x_61_cast_fp16")]; + tensor var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55046400)))]; + tensor var_797_to_fp16 = const()[name = string("op_797_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55341376)))]; + tensor linear_27_cast_fp16 = linear(bias = var_797_to_fp16, weight = var_796_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")]; + tensor var_804_axes_0 = const()[name = string("op_804_axes_0"), val = tensor([-1])]; + tensor blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55342208)))]; + tensor blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343040)))]; + tensor var_804_cast_fp16 = layer_norm(axes = var_804_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_804_cast_fp16")]; + tensor var_813_to_fp16 = const()[name = string("op_813_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343872)))]; + tensor var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55638848)))]; + tensor linear_28_cast_fp16 = linear(bias = var_814_to_fp16, weight = var_813_to_fp16, x = var_804_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor concat_84 = const()[name = string("concat_84"), val = tensor([0, 0, 0])]; + tensor concat_85 = const()[name = string("concat_85"), val = tensor([0, 1500, 0])]; + tensor k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_86 = const()[name = string("concat_86"), val = tensor([0, 0, 0])]; + tensor concat_87 = const()[name = string("concat_87"), val = tensor([0, 1500, 0])]; + tensor v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1])]; + tensor v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor([true, false, true])]; + tensor v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([false, false, false])]; + tensor v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")]; + tensor concat_88x = const()[name = string("concat_88x"), val = tensor([1, -1, 6, 64])]; + tensor var_834_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_834_cast_fp16")]; + tensor const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_834_cast_fp16, y = const_34_to_fp16)[name = string("q_cast_fp16")]; + tensor var_840 = const()[name = string("op_840"), val = tensor([1, 1500, 6, -1])]; + tensor var_841_cast_fp16 = reshape(shape = var_840, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_841_cast_fp16")]; + tensor const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_841_cast_fp16, y = const_35_to_fp16)[name = string("k_cast_fp16")]; + tensor var_847 = const()[name = string("op_847"), val = tensor([1, 1500, 6, -1])]; + tensor var_848_cast_fp16 = reshape(shape = var_847, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_848_cast_fp16")]; + tensor var_849 = const()[name = string("op_849"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = k_cast_fp16)[name = string("transpose_50")]; + tensor transpose_47 = transpose(perm = transpose_47_perm_0, x = q_cast_fp16)[name = string("transpose_51")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_47, y = transpose_48)[name = string("qk_cast_fp16")]; + tensor var_853_cast_fp16 = softmax(axis = var_697, x = qk_cast_fp16)[name = string("op_853_cast_fp16")]; + bool var_855_transpose_x_0 = const()[name = string("op_855_transpose_x_0"), val = bool(false)]; + bool var_855_transpose_y_0 = const()[name = string("op_855_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_849, x = var_848_cast_fp16)[name = string("transpose_52")]; + tensor var_855_cast_fp16 = matmul(transpose_x = var_855_transpose_x_0, transpose_y = var_855_transpose_y_0, x = var_853_cast_fp16, y = v_cast_fp16)[name = string("op_855_cast_fp16")]; + tensor var_856 = const()[name = string("op_856"), val = tensor([0, 2, 1, 3])]; + tensor concat_89x = const()[name = string("concat_89x"), val = tensor([1, -1, 384])]; + tensor var_857_cast_fp16 = transpose(perm = var_856, x = var_855_cast_fp16)[name = string("transpose_49")]; + tensor x_67_cast_fp16 = reshape(shape = concat_89x, x = var_857_cast_fp16)[name = string("x_67_cast_fp16")]; + tensor var_861_to_fp16 = const()[name = string("op_861_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55639680)))]; + tensor var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55934656)))]; + tensor linear_29_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")]; + tensor var_869_axes_0 = const()[name = string("op_869_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55935488)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55936320)))]; + tensor var_869_cast_fp16 = layer_norm(axes = var_869_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_869_cast_fp16")]; + tensor var_878_to_fp16 = const()[name = string("op_878_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55937152)))]; + tensor var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57116864)))]; + tensor linear_30_cast_fp16 = linear(bias = var_879_to_fp16, weight = var_878_to_fp16, x = var_869_cast_fp16)[name = string("linear_30_cast_fp16")]; + string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")]; + tensor x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")]; + tensor var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57120000)))]; + tensor var_885_to_fp16 = const()[name = string("op_885_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58299712)))]; + tensor linear_31_cast_fp16 = linear(bias = var_885_to_fp16, weight = var_884_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")]; + tensor var_898_axes_0 = const()[name = string("op_898_axes_0"), val = tensor([-1])]; + tensor ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58300544)))]; + tensor ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58301376)))]; + fp16 var_889_to_fp16 = const()[name = string("op_889_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_898_cast_fp16 = layer_norm(axes = var_898_axes_0, beta = ln_bias_to_fp16, epsilon = var_889_to_fp16, gamma = ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_898_cast_fp16")]; + tensor var_908_bias_0_to_fp16 = const()[name = string("op_908_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58302208)))]; + tensor logits = linear(bias = var_908_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_898_cast_fp16)[name = string("op_908_cast_fp16")]; + } -> (logits); +} \ No newline at end of file diff --git a/tiny/decoder_second.mlmodelc/weights/weight.bin b/tiny/decoder_second.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e6d7af3645a778a74ca9a4c32ca95f8726e841b --- /dev/null +++ b/tiny/decoder_second.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4929371005f27adeec8f37f375227b8128a408fdd9200141e55bcb60b15ecd +size 58406002 diff --git a/tiny/encoder.mlmodelc/analytics/coremldata.bin b/tiny/encoder.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..c039acf268199f7868ea911d5bfaf2980a0a085f --- /dev/null +++ b/tiny/encoder.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380e64d472e815b7206b93221ca71457f93ca000336ae30076e221b957869230 +size 243 diff --git a/tiny/encoder.mlmodelc/coremldata.bin b/tiny/encoder.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..c562185638877041b6e19bba7a231c397aab67df --- /dev/null +++ b/tiny/encoder.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0e4ad833efbbc0cff4791bce7b7852af5d36a9f32212d93cf63a454e89733b +size 318 diff --git a/tiny/encoder.mlmodelc/metadata.json b/tiny/encoder.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..96e3fa505bf060e5217b13131e775ff07cdf3cc2 --- /dev/null +++ b/tiny/encoder.mlmodelc/metadata.json @@ -0,0 +1,69 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1500 × 384)", + "shortDescription" : "", + "shape" : "[1, 1500, 384]", + "name" : "output", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 9, + "mlProgramOperationTypeHistogram" : { + "Ios18.mul" : 8, + "Ios18.softmax" : 4, + "Ios18.linear" : 24, + "Ios18.gelu" : 6, + "Ios18.layerNorm" : 9, + "Ios18.transpose" : 17, + "Ios18.matmul" : 8, + "Ios18.conv" : 2, + "Ios18.add" : 9, + "Ios18.reshape" : 16 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "stateSchema" : [ + + ], + "availability" : { + "macOS" : "15.0", + "tvOS" : "18.0", + "visionOS" : "2.0", + "watchOS" : "11.0", + "iOS" : "18.0", + "macCatalyst" : "18.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.version" : "8.0", + "com.github.apple.coremltools.source" : "torch==2.4.1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 80 × 3000)", + "shortDescription" : "", + "shape" : "[1, 80, 3000]", + "name" : "logmel_data", + "type" : "MultiArray" + } + ], + "generatedClassName" : "encoder", + "method" : "predict" + } +] \ No newline at end of file diff --git a/tiny/encoder.mlmodelc/model.mil b/tiny/encoder.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..a714d7f89a7b947d00e60c58452d3233aa2c3fd2 --- /dev/null +++ b/tiny/encoder.mlmodelc/model.mil @@ -0,0 +1,268 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})] +{ + func main(tensor logmel_data) { + string var_28_pad_type_0 = const()[name = string("op_28_pad_type_0"), val = string("custom")]; + tensor var_28_pad_0 = const()[name = string("op_28_pad_0"), val = tensor([1, 1])]; + tensor var_28_strides_0 = const()[name = string("op_28_strides_0"), val = tensor([1])]; + tensor var_28_dilations_0 = const()[name = string("op_28_dilations_0"), val = tensor([1])]; + int32 var_28_groups_0 = const()[name = string("op_28_groups_0"), val = int32(1)]; + tensor weight_3_to_fp16 = const()[name = string("weight_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + tensor bias_3_to_fp16 = const()[name = string("bias_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184448)))]; + tensor var_28_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_28_dilations_0, groups = var_28_groups_0, pad = var_28_pad_0, pad_type = var_28_pad_type_0, strides = var_28_strides_0, weight = weight_3_to_fp16, x = logmel_data)[name = string("op_28_cast_fp16")]; + string input_1_mode_0 = const()[name = string("input_1_mode_0"), val = string("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_28_cast_fp16)[name = string("input_1_cast_fp16")]; + string var_46_pad_type_0 = const()[name = string("op_46_pad_type_0"), val = string("custom")]; + tensor var_46_pad_0 = const()[name = string("op_46_pad_0"), val = tensor([1, 1])]; + tensor var_46_strides_0 = const()[name = string("op_46_strides_0"), val = tensor([2])]; + tensor var_46_dilations_0 = const()[name = string("op_46_dilations_0"), val = tensor([1])]; + int32 var_46_groups_0 = const()[name = string("op_46_groups_0"), val = int32(1)]; + tensor weight_7_to_fp16 = const()[name = string("weight_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185280)))]; + tensor bias_7_to_fp16 = const()[name = string("bias_7_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070080)))]; + tensor var_46_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_46_dilations_0, groups = var_46_groups_0, pad = var_46_pad_0, pad_type = var_46_pad_type_0, strides = var_46_strides_0, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = string("op_46_cast_fp16")]; + string x_3_mode_0 = const()[name = string("x_3_mode_0"), val = string("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_46_cast_fp16)[name = string("x_3_cast_fp16")]; + tensor var_52 = const()[name = string("op_52"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070912)))]; + tensor x_5_cast_fp16 = transpose(perm = var_52, x = x_3_cast_fp16)[name = string("transpose_40")]; + tensor var_55_cast_fp16 = add(x = x_5_cast_fp16, y = positional_embedding_to_fp16)[name = string("op_55_cast_fp16")]; + int32 var_67 = const()[name = string("op_67"), val = int32(-1)]; + tensor var_83_axes_0 = const()[name = string("op_83_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2222976)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2223808)))]; + fp16 var_73_to_fp16 = const()[name = string("op_73_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_83_cast_fp16 = layer_norm(axes = var_83_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_55_cast_fp16)[name = string("op_83_cast_fp16")]; + tensor var_94_to_fp16 = const()[name = string("op_94_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2224640)))]; + tensor var_95_to_fp16 = const()[name = string("op_95_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2519616)))]; + tensor linear_0_cast_fp16 = linear(bias = var_95_to_fp16, weight = var_94_to_fp16, x = var_83_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor var_98_to_fp16 = const()[name = string("op_98_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2520448)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2815424)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_98_to_fp16, x = var_83_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor var_102_to_fp16 = const()[name = string("op_102_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2816256)))]; + tensor var_103_to_fp16 = const()[name = string("op_103_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3111232)))]; + tensor linear_2_cast_fp16 = linear(bias = var_103_to_fp16, weight = var_102_to_fp16, x = var_83_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor var_111 = const()[name = string("op_111"), val = tensor([1, 1500, 6, -1])]; + tensor var_112_cast_fp16 = reshape(shape = var_111, x = linear_0_cast_fp16)[name = string("op_112_cast_fp16")]; + tensor const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_112_cast_fp16, y = const_28_to_fp16)[name = string("q_3_cast_fp16")]; + tensor var_118 = const()[name = string("op_118"), val = tensor([1, 1500, 6, -1])]; + tensor var_119_cast_fp16 = reshape(shape = var_118, x = linear_1_cast_fp16)[name = string("op_119_cast_fp16")]; + tensor const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_119_cast_fp16, y = const_29_to_fp16)[name = string("k_3_cast_fp16")]; + tensor var_125 = const()[name = string("op_125"), val = tensor([1, 1500, 6, -1])]; + tensor var_126_cast_fp16 = reshape(shape = var_125, x = linear_2_cast_fp16)[name = string("op_126_cast_fp16")]; + tensor var_127 = const()[name = string("op_127"), val = tensor([0, 2, 1, 3])]; + bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)]; + bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)]; + tensor transpose_16_perm_0 = const()[name = string("transpose_16_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_17 = transpose(perm = transpose_17_perm_0, x = k_3_cast_fp16)[name = string("transpose_37")]; + tensor transpose_16 = transpose(perm = transpose_16_perm_0, x = q_3_cast_fp16)[name = string("transpose_38")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_16, y = transpose_17)[name = string("qk_1_cast_fp16")]; + tensor var_131_cast_fp16 = softmax(axis = var_67, x = qk_1_cast_fp16)[name = string("op_131_cast_fp16")]; + bool var_133_transpose_x_0 = const()[name = string("op_133_transpose_x_0"), val = bool(false)]; + bool var_133_transpose_y_0 = const()[name = string("op_133_transpose_y_0"), val = bool(false)]; + tensor v_3_cast_fp16 = transpose(perm = var_127, x = var_126_cast_fp16)[name = string("transpose_39")]; + tensor var_133_cast_fp16 = matmul(transpose_x = var_133_transpose_x_0, transpose_y = var_133_transpose_y_0, x = var_131_cast_fp16, y = v_3_cast_fp16)[name = string("op_133_cast_fp16")]; + tensor var_134 = const()[name = string("op_134"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = string("concat_0"), val = tensor([1, 1500, 384])]; + tensor var_135_cast_fp16 = transpose(perm = var_134, x = var_133_cast_fp16)[name = string("transpose_36")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = var_135_cast_fp16)[name = string("x_11_cast_fp16")]; + tensor var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3112064)))]; + tensor var_140_to_fp16 = const()[name = string("op_140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3407040)))]; + tensor linear_3_cast_fp16 = linear(bias = var_140_to_fp16, weight = var_139_to_fp16, x = x_11_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_55_cast_fp16, y = linear_3_cast_fp16)[name = string("x_13_cast_fp16")]; + tensor var_147_axes_0 = const()[name = string("op_147_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3407872)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3408704)))]; + tensor var_147_cast_fp16 = layer_norm(axes = var_147_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = string("op_147_cast_fp16")]; + tensor var_156_to_fp16 = const()[name = string("op_156_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3409536)))]; + tensor var_157_to_fp16 = const()[name = string("op_157_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4589248)))]; + tensor linear_4_cast_fp16 = linear(bias = var_157_to_fp16, weight = var_156_to_fp16, x = var_147_cast_fp16)[name = string("linear_4_cast_fp16")]; + string x_17_mode_0 = const()[name = string("x_17_mode_0"), val = string("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = string("x_17_cast_fp16")]; + tensor var_162_to_fp16 = const()[name = string("op_162_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4592384)))]; + tensor var_163_to_fp16 = const()[name = string("op_163_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5772096)))]; + tensor linear_5_cast_fp16 = linear(bias = var_163_to_fp16, weight = var_162_to_fp16, x = x_17_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = string("x_19_cast_fp16")]; + int32 var_172 = const()[name = string("op_172"), val = int32(-1)]; + tensor var_188_axes_0 = const()[name = string("op_188_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5772928)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5773760)))]; + fp16 var_178_to_fp16 = const()[name = string("op_178_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_188_cast_fp16 = layer_norm(axes = var_188_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_178_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = string("op_188_cast_fp16")]; + tensor var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5774592)))]; + tensor var_200_to_fp16 = const()[name = string("op_200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6069568)))]; + tensor linear_6_cast_fp16 = linear(bias = var_200_to_fp16, weight = var_199_to_fp16, x = var_188_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor var_203_to_fp16 = const()[name = string("op_203_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6070400)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_203_to_fp16, x = var_188_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor var_207_to_fp16 = const()[name = string("op_207_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6365376)))]; + tensor var_208_to_fp16 = const()[name = string("op_208_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6660352)))]; + tensor linear_8_cast_fp16 = linear(bias = var_208_to_fp16, weight = var_207_to_fp16, x = var_188_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor var_216 = const()[name = string("op_216"), val = tensor([1, 1500, 6, -1])]; + tensor var_217_cast_fp16 = reshape(shape = var_216, x = linear_6_cast_fp16)[name = string("op_217_cast_fp16")]; + tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_217_cast_fp16, y = const_30_to_fp16)[name = string("q_7_cast_fp16")]; + tensor var_223 = const()[name = string("op_223"), val = tensor([1, 1500, 6, -1])]; + tensor var_224_cast_fp16 = reshape(shape = var_223, x = linear_7_cast_fp16)[name = string("op_224_cast_fp16")]; + tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_224_cast_fp16, y = const_31_to_fp16)[name = string("k_7_cast_fp16")]; + tensor var_230 = const()[name = string("op_230"), val = tensor([1, 1500, 6, -1])]; + tensor var_231_cast_fp16 = reshape(shape = var_230, x = linear_8_cast_fp16)[name = string("op_231_cast_fp16")]; + tensor var_232 = const()[name = string("op_232"), val = tensor([0, 2, 1, 3])]; + bool qk_3_transpose_x_0 = const()[name = string("qk_3_transpose_x_0"), val = bool(false)]; + bool qk_3_transpose_y_0 = const()[name = string("qk_3_transpose_y_0"), val = bool(false)]; + tensor transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_19 = transpose(perm = transpose_19_perm_0, x = k_7_cast_fp16)[name = string("transpose_33")]; + tensor transpose_18 = transpose(perm = transpose_18_perm_0, x = q_7_cast_fp16)[name = string("transpose_34")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_18, y = transpose_19)[name = string("qk_3_cast_fp16")]; + tensor var_236_cast_fp16 = softmax(axis = var_172, x = qk_3_cast_fp16)[name = string("op_236_cast_fp16")]; + bool var_238_transpose_x_0 = const()[name = string("op_238_transpose_x_0"), val = bool(false)]; + bool var_238_transpose_y_0 = const()[name = string("op_238_transpose_y_0"), val = bool(false)]; + tensor v_7_cast_fp16 = transpose(perm = var_232, x = var_231_cast_fp16)[name = string("transpose_35")]; + tensor var_238_cast_fp16 = matmul(transpose_x = var_238_transpose_x_0, transpose_y = var_238_transpose_y_0, x = var_236_cast_fp16, y = v_7_cast_fp16)[name = string("op_238_cast_fp16")]; + tensor var_239 = const()[name = string("op_239"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = string("concat_1"), val = tensor([1, 1500, 384])]; + tensor var_240_cast_fp16 = transpose(perm = var_239, x = var_238_cast_fp16)[name = string("transpose_32")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = var_240_cast_fp16)[name = string("x_23_cast_fp16")]; + tensor var_244_to_fp16 = const()[name = string("op_244_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6661184)))]; + tensor var_245_to_fp16 = const()[name = string("op_245_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6956160)))]; + tensor linear_9_cast_fp16 = linear(bias = var_245_to_fp16, weight = var_244_to_fp16, x = x_23_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = string("x_25_cast_fp16")]; + tensor var_252_axes_0 = const()[name = string("op_252_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6956992)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6957824)))]; + tensor var_252_cast_fp16 = layer_norm(axes = var_252_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_178_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = string("op_252_cast_fp16")]; + tensor var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6958656)))]; + tensor var_262_to_fp16 = const()[name = string("op_262_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8138368)))]; + tensor linear_10_cast_fp16 = linear(bias = var_262_to_fp16, weight = var_261_to_fp16, x = var_252_cast_fp16)[name = string("linear_10_cast_fp16")]; + string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = string("x_29_cast_fp16")]; + tensor var_267_to_fp16 = const()[name = string("op_267_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8141504)))]; + tensor var_268_to_fp16 = const()[name = string("op_268_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9321216)))]; + tensor linear_11_cast_fp16 = linear(bias = var_268_to_fp16, weight = var_267_to_fp16, x = x_29_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = string("x_31_cast_fp16")]; + int32 var_277 = const()[name = string("op_277"), val = int32(-1)]; + tensor var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9322048)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9322880)))]; + fp16 var_283_to_fp16 = const()[name = string("op_283_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_293_cast_fp16 = layer_norm(axes = var_293_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = string("op_293_cast_fp16")]; + tensor var_304_to_fp16 = const()[name = string("op_304_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9323712)))]; + tensor var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9618688)))]; + tensor linear_12_cast_fp16 = linear(bias = var_305_to_fp16, weight = var_304_to_fp16, x = var_293_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor var_308_to_fp16 = const()[name = string("op_308_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9619520)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_308_to_fp16, x = var_293_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor var_312_to_fp16 = const()[name = string("op_312_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9914496)))]; + tensor var_313_to_fp16 = const()[name = string("op_313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10209472)))]; + tensor linear_14_cast_fp16 = linear(bias = var_313_to_fp16, weight = var_312_to_fp16, x = var_293_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor var_321 = const()[name = string("op_321"), val = tensor([1, 1500, 6, -1])]; + tensor var_322_cast_fp16 = reshape(shape = var_321, x = linear_12_cast_fp16)[name = string("op_322_cast_fp16")]; + tensor const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_322_cast_fp16, y = const_32_to_fp16)[name = string("q_11_cast_fp16")]; + tensor var_328 = const()[name = string("op_328"), val = tensor([1, 1500, 6, -1])]; + tensor var_329_cast_fp16 = reshape(shape = var_328, x = linear_13_cast_fp16)[name = string("op_329_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_329_cast_fp16, y = const_33_to_fp16)[name = string("k_11_cast_fp16")]; + tensor var_335 = const()[name = string("op_335"), val = tensor([1, 1500, 6, -1])]; + tensor var_336_cast_fp16 = reshape(shape = var_335, x = linear_14_cast_fp16)[name = string("op_336_cast_fp16")]; + tensor var_337 = const()[name = string("op_337"), val = tensor([0, 2, 1, 3])]; + bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)]; + bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)]; + tensor transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_21 = transpose(perm = transpose_21_perm_0, x = k_11_cast_fp16)[name = string("transpose_29")]; + tensor transpose_20 = transpose(perm = transpose_20_perm_0, x = q_11_cast_fp16)[name = string("transpose_30")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_20, y = transpose_21)[name = string("qk_5_cast_fp16")]; + tensor var_341_cast_fp16 = softmax(axis = var_277, x = qk_5_cast_fp16)[name = string("op_341_cast_fp16")]; + bool var_343_transpose_x_0 = const()[name = string("op_343_transpose_x_0"), val = bool(false)]; + bool var_343_transpose_y_0 = const()[name = string("op_343_transpose_y_0"), val = bool(false)]; + tensor v_11_cast_fp16 = transpose(perm = var_337, x = var_336_cast_fp16)[name = string("transpose_31")]; + tensor var_343_cast_fp16 = matmul(transpose_x = var_343_transpose_x_0, transpose_y = var_343_transpose_y_0, x = var_341_cast_fp16, y = v_11_cast_fp16)[name = string("op_343_cast_fp16")]; + tensor var_344 = const()[name = string("op_344"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = string("concat_2"), val = tensor([1, 1500, 384])]; + tensor var_345_cast_fp16 = transpose(perm = var_344, x = var_343_cast_fp16)[name = string("transpose_28")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = var_345_cast_fp16)[name = string("x_35_cast_fp16")]; + tensor var_349_to_fp16 = const()[name = string("op_349_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10210304)))]; + tensor var_350_to_fp16 = const()[name = string("op_350_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10505280)))]; + tensor linear_15_cast_fp16 = linear(bias = var_350_to_fp16, weight = var_349_to_fp16, x = x_35_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = string("x_37_cast_fp16")]; + tensor var_357_axes_0 = const()[name = string("op_357_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10506112)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10506944)))]; + tensor var_357_cast_fp16 = layer_norm(axes = var_357_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = string("op_357_cast_fp16")]; + tensor var_366_to_fp16 = const()[name = string("op_366_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10507776)))]; + tensor var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11687488)))]; + tensor linear_16_cast_fp16 = linear(bias = var_367_to_fp16, weight = var_366_to_fp16, x = var_357_cast_fp16)[name = string("linear_16_cast_fp16")]; + string x_41_mode_0 = const()[name = string("x_41_mode_0"), val = string("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = string("x_41_cast_fp16")]; + tensor var_372_to_fp16 = const()[name = string("op_372_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11690624)))]; + tensor var_373_to_fp16 = const()[name = string("op_373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12870336)))]; + tensor linear_17_cast_fp16 = linear(bias = var_373_to_fp16, weight = var_372_to_fp16, x = x_41_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = string("x_43_cast_fp16")]; + int32 var_382 = const()[name = string("op_382"), val = int32(-1)]; + tensor var_398_axes_0 = const()[name = string("op_398_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12871168)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12872000)))]; + fp16 var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_398_cast_fp16 = layer_norm(axes = var_398_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_388_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = string("op_398_cast_fp16")]; + tensor var_409_to_fp16 = const()[name = string("op_409_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12872832)))]; + tensor var_410_to_fp16 = const()[name = string("op_410_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13167808)))]; + tensor linear_18_cast_fp16 = linear(bias = var_410_to_fp16, weight = var_409_to_fp16, x = var_398_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_413_to_fp16 = const()[name = string("op_413_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13168640)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_413_to_fp16, x = var_398_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor var_417_to_fp16 = const()[name = string("op_417_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13463616)))]; + tensor var_418_to_fp16 = const()[name = string("op_418_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13758592)))]; + tensor linear_20_cast_fp16 = linear(bias = var_418_to_fp16, weight = var_417_to_fp16, x = var_398_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor var_426 = const()[name = string("op_426"), val = tensor([1, 1500, 6, -1])]; + tensor var_427_cast_fp16 = reshape(shape = var_426, x = linear_18_cast_fp16)[name = string("op_427_cast_fp16")]; + tensor const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_427_cast_fp16, y = const_34_to_fp16)[name = string("q_cast_fp16")]; + tensor var_433 = const()[name = string("op_433"), val = tensor([1, 1500, 6, -1])]; + tensor var_434_cast_fp16 = reshape(shape = var_433, x = linear_19_cast_fp16)[name = string("op_434_cast_fp16")]; + tensor const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_434_cast_fp16, y = const_35_to_fp16)[name = string("k_cast_fp16")]; + tensor var_440 = const()[name = string("op_440"), val = tensor([1, 1500, 6, -1])]; + tensor var_441_cast_fp16 = reshape(shape = var_440, x = linear_20_cast_fp16)[name = string("op_441_cast_fp16")]; + tensor var_442 = const()[name = string("op_442"), val = tensor([0, 2, 1, 3])]; + bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)]; + bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)]; + tensor transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_23 = transpose(perm = transpose_23_perm_0, x = k_cast_fp16)[name = string("transpose_25")]; + tensor transpose_22 = transpose(perm = transpose_22_perm_0, x = q_cast_fp16)[name = string("transpose_26")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_22, y = transpose_23)[name = string("qk_cast_fp16")]; + tensor var_446_cast_fp16 = softmax(axis = var_382, x = qk_cast_fp16)[name = string("op_446_cast_fp16")]; + bool var_448_transpose_x_0 = const()[name = string("op_448_transpose_x_0"), val = bool(false)]; + bool var_448_transpose_y_0 = const()[name = string("op_448_transpose_y_0"), val = bool(false)]; + tensor v_cast_fp16 = transpose(perm = var_442, x = var_441_cast_fp16)[name = string("transpose_27")]; + tensor var_448_cast_fp16 = matmul(transpose_x = var_448_transpose_x_0, transpose_y = var_448_transpose_y_0, x = var_446_cast_fp16, y = v_cast_fp16)[name = string("op_448_cast_fp16")]; + tensor var_449 = const()[name = string("op_449"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = string("concat_3"), val = tensor([1, 1500, 384])]; + tensor var_450_cast_fp16 = transpose(perm = var_449, x = var_448_cast_fp16)[name = string("transpose_24")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = var_450_cast_fp16)[name = string("x_47_cast_fp16")]; + tensor var_454_to_fp16 = const()[name = string("op_454_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13759424)))]; + tensor var_455_to_fp16 = const()[name = string("op_455_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14054400)))]; + tensor linear_21_cast_fp16 = linear(bias = var_455_to_fp16, weight = var_454_to_fp16, x = x_47_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = string("x_49_cast_fp16")]; + tensor var_462_axes_0 = const()[name = string("op_462_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14055232)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14056064)))]; + tensor var_462_cast_fp16 = layer_norm(axes = var_462_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_388_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = string("op_462_cast_fp16")]; + tensor var_471_to_fp16 = const()[name = string("op_471_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14056896)))]; + tensor var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15236608)))]; + tensor linear_22_cast_fp16 = linear(bias = var_472_to_fp16, weight = var_471_to_fp16, x = var_462_cast_fp16)[name = string("linear_22_cast_fp16")]; + string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = string("x_53_cast_fp16")]; + tensor var_477_to_fp16 = const()[name = string("op_477_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15239744)))]; + tensor var_478_to_fp16 = const()[name = string("op_478_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16419456)))]; + tensor linear_23_cast_fp16 = linear(bias = var_478_to_fp16, weight = var_477_to_fp16, x = x_53_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = string("x_cast_fp16")]; + tensor var_491_axes_0 = const()[name = string("op_491_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = string("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16420288)))]; + tensor ln_post_bias_to_fp16 = const()[name = string("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16421120)))]; + fp16 var_482_to_fp16 = const()[name = string("op_482_to_fp16"), val = fp16(0x1.5p-17)]; + tensor output = layer_norm(axes = var_491_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_482_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = string("op_491_cast_fp16")]; + } -> (output); +} \ No newline at end of file diff --git a/tiny/encoder.mlmodelc/weights/weight.bin b/tiny/encoder.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e98f9000570b03c2ece1fcc966a3cd12fcfab05 --- /dev/null +++ b/tiny/encoder.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4efa9bb81afaf12ac6d7cf7a3a4ba1e6b92f05f96ae77fd55cf725e2ecd3a5fd +size 16421952 diff --git a/tiny/model_dims.json b/tiny/model_dims.json new file mode 100644 index 0000000000000000000000000000000000000000..811192d21c045ca7d97eb76fe25e783bea0802f0 --- /dev/null +++ b/tiny/model_dims.json @@ -0,0 +1,12 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 384, + "n_audio_head": 6, + "n_audio_layer": 4, + "n_vocab": 51865, + "n_text_ctx": 448, + "n_text_state": 384, + "n_text_head": 6, + "n_text_layer": 4 +} \ No newline at end of file diff --git a/whisper_convert.py b/whisper_convert.py new file mode 100755 index 0000000000000000000000000000000000000000..0a3bc6498bd33c9a00cca4167f10823b1043f3bc --- /dev/null +++ b/whisper_convert.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 + +from whisper import whisper + +import torch +from torch import Tensor, nn +import torch.nn.functional as F +from typing import Optional, Iterable +from dataclasses import dataclass +import json + +@dataclass +class ModelDimensions: + n_mels: int + n_audio_ctx: int + n_audio_state: int + n_audio_head: int + n_audio_layer: int + n_vocab: int + n_text_ctx: int + n_text_state: int + n_text_head: int + n_text_layer: int + +class LayerNorm(nn.LayerNorm): + def forward(self, x: Tensor) -> Tensor: + return super().forward(x.float()).type(x.dtype) + + +class Linear(nn.Linear): + def forward(self, x: Tensor) -> Tensor: + return F.linear( + x, + self.weight.to(x.dtype), + None if self.bias is None else self.bias.to(x.dtype), + ) + +class Conv1d(nn.Conv1d): + def _conv_forward( + self, x: Tensor, weight: Tensor, bias: Optional[Tensor] + ) -> Tensor: + return super()._conv_forward( + x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype) + ) + +class MultiHeadAttention(nn.Module): + def __init__(self, n_state: int, n_head: int, no_cross: bool = False, cross_only: bool = False): + super().__init__() + self.no_cross = no_cross + self.cross_only = cross_only + self.n_head = n_head + if not cross_only: + self.query = Linear(n_state, n_state) + self.out = Linear(n_state, n_state) + if not no_cross: + self.key = Linear(n_state, n_state, bias=False) + self.value = Linear(n_state, n_state) + + def forward( + self, + x: Tensor, + mask: Optional[Tensor] = None, + k_cache: Optional[Tensor] = None, + v_cache: Optional[Tensor] = None, + offset: Optional[int] = None, + ): + if self.cross_only: + k = self.key(x) + v = self.value(x) + k_len = k.shape[-2] + k_cache[:,:k_len,:] = k + v_len = v.shape[-2] + v_cache[:,:v_len,:] = v + return x + + q = self.query(x) + + if self.no_cross: + k = torch.zeros_like(k_cache) + k_len = k.shape[-2] + k[:,:k_len,:] = k_cache + v = torch.zeros_like(v_cache) + v_len = v.shape[-2] + v[:,:k_len,:] = v_cache + + else: + k = self.key(x) + v = self.value(x) + + q_len = q.shape[-2] + end_step = offset + q_len + + k_cache[:, offset:end_step, :] = k + v_cache[:, offset:end_step, :] = v + + k = k_cache[:, :end_step, :] + v = v_cache[:, :end_step, :] + + wv = self.qkv_attention(q, k, v, mask) + return self.out(wv) + + def qkv_attention( + self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None + ): + n_batch, n_ctx, n_state = q.shape + scale = (n_state // self.n_head) ** -0.25 + q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) * scale + k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 3, 1) * scale + v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) + + qk = q @ k + if mask is not None: + qk = qk + mask[:n_ctx, :n_ctx] + qk = qk.float() + + w = F.softmax(qk, dim=-1).to(q.dtype) + return (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2) + +class ResidualAttentionBlock(nn.Module): + def __init__(self, n_state: int, n_head: int, cross_attention: bool = False, cross_only: bool = False): + super().__init__() + self.cross_only = cross_only + if cross_only: + self.cross_attn = ( + MultiHeadAttention(n_state, n_head, cross_only=True) + ) + else: + self.attn = MultiHeadAttention(n_state, n_head) + self.attn_ln = LayerNorm(n_state) + + self.cross_attn = ( + MultiHeadAttention(n_state, n_head, no_cross=True) if cross_attention else None + ) + self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None + + n_mlp = n_state * 4 + self.mlp = nn.Sequential( + Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state) + ) + self.mlp_ln = LayerNorm(n_state) + + def forward( + self, + x: Tensor, + offset: Optional[int] = None, + mask: Optional[Tensor] = None, + k_cache1: Optional[Tensor] = None, + v_cache1: Optional[Tensor] = None, + k_cache2: Optional[Tensor] = None, + v_cache2: Optional[Tensor] = None, + ): + if self.cross_only: + x = self.cross_attn(x, k_cache=k_cache2, v_cache=v_cache2) + else: + x = x + self.attn(self.attn_ln(x), mask=mask, k_cache=k_cache1, v_cache=v_cache1, offset=offset) + if self.cross_attn: + x = x + self.cross_attn(self.cross_attn_ln(x), k_cache=k_cache2, v_cache=v_cache2) + x = x + self.mlp(self.mlp_ln(x)) + return x + +class TextDecoder_first(nn.Module): + def __init__( + self, n_batch: int, n_vocab: int, n_text_ctx: int, n_audio_ctx: int, n_state: int, n_head: int, n_layer: int + ): + super().__init__() + + self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( + [ + ResidualAttentionBlock(n_state, n_head, cross_attention=True, cross_only=True) + for _ in range(n_layer) + ] + ) + + self.kvcache_shape1 = (n_layer, n_batch, n_text_ctx, n_state) + self.kvcache_shape2 = (n_layer, n_batch, n_audio_ctx, n_state) + self.register_buffer("k_cache1", torch.zeros(self.kvcache_shape1)) + self.register_buffer("v_cache1", torch.zeros(self.kvcache_shape1)) + self.register_buffer("k_cache2", torch.zeros(self.kvcache_shape2)) + self.register_buffer("v_cache2", torch.zeros(self.kvcache_shape2)) + + def forward(self, xa: Tensor): + """ + xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state) + the encoded audio features to be attended on + """ + self.k_cache1[:,:,:,:] = 0 + self.v_cache1[:,:,:,:] = 0 + x = xa + for i, block in enumerate(self.blocks): + x = block(x, k_cache2=self.k_cache2[i], v_cache2=self.v_cache2[i]) + + return x + + +class TextDecoder_second(nn.Module): + def __init__( + self, n_batch: int, n_vocab: int, n_text_ctx: int, n_audio_ctx: int, n_state: int, n_head: int, n_layer: int + ): + super().__init__() + + self.token_embedding = nn.Embedding(n_vocab, n_state) + self.positional_embedding = nn.Parameter(torch.empty(n_text_ctx, n_state)) + + self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( + [ + ResidualAttentionBlock(n_state, n_head, cross_attention=True) + for _ in range(n_layer) + ] + ) + self.ln = LayerNorm(n_state) + + mask = torch.empty(n_text_ctx, n_text_ctx).fill_(-np.inf).triu_(1) + self.register_buffer("mask", mask, persistent=False) + + self.kvcache_shape1 = (n_layer, n_batch, n_text_ctx, n_state) + self.kvcache_shape2 = (n_layer, n_batch, n_audio_ctx, n_state) + self.register_buffer("k_cache1", torch.zeros(self.kvcache_shape1)) + self.register_buffer("v_cache1", torch.zeros(self.kvcache_shape1)) + self.register_buffer("k_cache2", torch.zeros(self.kvcache_shape2)) + self.register_buffer("v_cache2", torch.zeros(self.kvcache_shape2)) + + def forward(self, x: Tensor, offset_mask: Tensor): + """ + x : torch.LongTensor, shape = (batch_size, <= n_ctx) + the text tokens + xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state) + the encoded audio features to be attended on + """ + end_step = offset_mask.shape[-1] + offset = end_step - x.shape[-1] + x = ( + self.token_embedding(x) + + self.positional_embedding[offset:end_step] + ) + + for i, block in enumerate(self.blocks): + x = block(x, offset=offset, mask=self.mask, k_cache1=self.k_cache1[i], v_cache1=self.v_cache1[i], k_cache2=self.k_cache2[i], v_cache2=self.v_cache2[i]) + + x = self.ln(x) + logits = ( + x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1) + ).float() + + return logits + +import numpy as np +import coremltools as ct + +def converter_encoder(model: whisper.Whisper, split: bool = False): + model.eval() + encoder = model.encoder + hparams = model.dims + + input_shape = (1, hparams.n_mels, 3000) + input_data = torch.randn(input_shape) + traced_model = torch.jit.trace(encoder, input_data) + + coreml_model = ct.convert( + traced_model, + inputs=[ct.TensorType(name="logmel_data", shape=input_shape)], + outputs=[ct.TensorType(name="output")], + minimum_deployment_target=ct.target.iOS18, + ) + coreml_model.save("encoder.mlpackage") + + if split: + ct.models.utils.bisect_model( + coreml_model, + "./encoder/", + merge_chunks_to_pipeline=True, + ) + del coreml_model + +def converter_decoder(model: whisper.Whisper): + model.eval() + org_decoder = model.decoder + hparams = model.dims + + batch_size = 1 + decoder1 = TextDecoder_first( + batch_size, + hparams.n_vocab, + hparams.n_text_ctx, + hparams.n_audio_ctx, + hparams.n_text_state, + hparams.n_text_head, + hparams.n_text_layer, + ) + + decoder1.load_state_dict(org_decoder.state_dict(), strict=False) + decoder1.eval() + + tokens_shape = (batch_size, 1) + audio_shape = (batch_size, hparams.n_audio_ctx, hparams.n_audio_state) + + audio_data = torch.randn(audio_shape) + traced_model1 = torch.jit.trace(decoder1, [audio_data]) + + audio_length = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_audio_ctx, default=1) + inputs = [ + ct.TensorType(shape=(batch_size, audio_length, hparams.n_audio_state), dtype=np.float16, name="audio_data"), + ] + outputs = [ct.TensorType(dtype=np.float16, name="dummy")] + states = [ + ct.StateType( + wrapped_type=ct.TensorType( + shape=decoder1.kvcache_shape1, dtype=np.float16 + ), + name="k_cache1", + ), + ct.StateType( + wrapped_type=ct.TensorType( + shape=decoder1.kvcache_shape1, dtype=np.float16 + ), + name="v_cache1", + ), + ct.StateType( + wrapped_type=ct.TensorType( + shape=decoder1.kvcache_shape2, dtype=np.float16 + ), + name="k_cache2", + ), + ct.StateType( + wrapped_type=ct.TensorType( + shape=decoder1.kvcache_shape2, dtype=np.float16 + ), + name="v_cache2", + ), + ] + + converted_model = ct.convert( + traced_model1, + inputs=inputs, + outputs=outputs, + states=states, + minimum_deployment_target=ct.target.iOS18, + ) + converted_model.save("decoder_first.mlpackage") + del traced_model1 + del converted_model + + decoder2 = TextDecoder_second( + batch_size, + hparams.n_vocab, + hparams.n_text_ctx, + hparams.n_audio_ctx, + hparams.n_text_state, + hparams.n_text_head, + hparams.n_text_layer, + ) + + decoder2.load_state_dict(org_decoder.state_dict(), strict=False) + decoder2.eval() + + token_data = torch.randint(hparams.n_vocab, tokens_shape).long() + offset_mask = torch.zeros(tokens_shape) + traced_model2 = torch.jit.trace(decoder2, [token_data, offset_mask]) + + query_length = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_text_ctx, default=1) + end_step_dim = ct.RangeDim(lower_bound=1, upper_bound=hparams.n_text_ctx, default=1) + inputs = [ + ct.TensorType(shape=(batch_size, query_length), dtype=np.int32, name="token_data"), + ct.TensorType(shape=(batch_size, end_step_dim), dtype=np.float16, name="offset_mask"), + ] + outputs = [ct.TensorType(dtype=np.float16, name="logits")] + + converted_model = ct.convert( + traced_model2, + inputs=inputs, + outputs=outputs, + states=states, + minimum_deployment_target=ct.target.iOS18, + ) + converted_model.save("decoder_second.mlpackage") + del traced_model2 + del converted_model + +def test_model(hparams: ModelDimensions): + logmel_shape = (1, hparams.n_mels, 3000) + + encoder = ct.models.MLModel("encoder.mlpackage") + encoder_output = encoder.predict({'logmel_data': np.random.rand(*logmel_shape)}) + audio_data = encoder_output['output'] + + decoder1 = ct.models.MLModel("decoder_first.mlpackage") + decoder2 = ct.models.MLModel("decoder_second.mlpackage") + decoder_state = decoder1.make_state() + decoder_input = { + 'audio_data': audio_data, + } + decoder_output = decoder1.predict(decoder_input, decoder_state) + + past_kv_len = 0 + token_data = np.random.randint(hparams.n_vocab, size=(1, 5), dtype=np.int32) + offset_mask = np.zeros((1, past_kv_len + 5)) + decoder_input = { + 'token_data': token_data, + 'offset_mask': offset_mask, + } + decoder_output = decoder2.predict(decoder_input, decoder_state) + print(decoder_output) + past_kv_len += 5 + + while past_kv_len + 1 < hparams.n_text_ctx: + token_data = np.random.randint(hparams.n_vocab, size=(1, 1), dtype=np.int32) + offset_mask = np.zeros((1, past_kv_len + 1)) + decoder_input = { + 'token_data': token_data, + 'offset_mask': offset_mask, + } + decoder_output = decoder2.predict(decoder_input, decoder_state) + print(decoder_output) + past_kv_len += 1 + +def print_dims(model: whisper.Whisper): + with open('model_dims.json', 'w') as f: + json.dump(model.dims.__dict__, f, indent=2) + +if __name__=='__main__': + import os + os.makedirs("work", exist_ok=True) + os.chdir("work") + for model_size in ['tiny','base','small','medium','large-v2','large-v3']: + print(model_size) + os.makedirs(model_size, exist_ok=True) + os.chdir(model_size) + model = whisper.load_model(model_size) + print_dims(model) + converter_encoder(model, split=model_size.startswith('large')) + converter_decoder(model) + # test_model(model.dims) + del model + os.chdir("..") + os.chdir("..")