File size: 137,046 Bytes
ca32d55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.4.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
{
    func main<ios18>(state<tensor<fp16, [4, 1, 448, 384]>> k_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> k_cache2, tensor<fp16, [1, ?]> offset_mask, tensor<int32, [1, ?]> token_data, state<tensor<fp16, [4, 1, 448, 384]>> v_cache1, state<tensor<fp16, [4, 1, 1500, 384]>> v_cache2) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"offset_mask", [1, 1]}, {"token_data", [1, 1]}}), ("RangeDims", {{"offset_mask", [[1, 1], [1, 448]]}, {"token_data", [[1, 1], [1, 448]]}})))] {
            tensor<int32, [2]> var_22_shape_cast_fp16 = shape(x = offset_mask)[name = string("op_22_shape_cast_fp16")];
            int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)];
            int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)];
            bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)];
            string var_22_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_22_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")];
            uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(1)];
            tensor<int16, [2]> var_22_shape_cast_fp16_to_int16 = cast(dtype = var_22_shape_cast_fp16_to_int16_dtype_0, x = var_22_shape_cast_fp16)[name = string("cast_58")];
            int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_22_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")];
            string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            tensor<int32, [2]> var_26_shape = shape(x = token_data)[name = string("op_26_shape")];
            int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)];
            int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)];
            bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)];
            string var_26_shape_to_uint16_dtype_0 = const()[name = string("op_26_shape_to_uint16_dtype_0"), val = string("uint16")];
            uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)];
            tensor<uint16, [2]> var_26_shape_to_uint16 = cast(dtype = var_26_shape_to_uint16_dtype_0, x = var_26_shape)[name = string("cast_56")];
            uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_26_shape_to_uint16)[name = string("gather_1_cast_uint16")];
            string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_55")];
            int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_57")];
            int32 offset = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("offset")];
            int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)];
            int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)];
            bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)];
            tensor<fp16, [51865, 384]> token_embedding_weight_to_fp16 = const()[name = string("token_embedding_weight_to_fp16"), val = tensor<fp16, [51865, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
            tensor<fp16, [1, ?, 384]> var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = token_data, validate_indices = var_42_validate_indices_0, x = token_embedding_weight_to_fp16)[name = string("op_42_cast_fp16")];
            int32 concat_0_values1_0 = const()[name = string("concat_0_values1_0"), val = int32(0)];
            int32 concat_0_axis_0 = const()[name = string("concat_0_axis_0"), val = int32(0)];
            bool concat_0_interleave_0 = const()[name = string("concat_0_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (offset, concat_0_values1_0))[name = string("concat_0")];
            int32 concat_1_values1_0 = const()[name = string("concat_1_values1_0"), val = int32(384)];
            int32 concat_1_axis_0 = const()[name = string("concat_1_axis_0"), val = int32(0)];
            bool concat_1_interleave_0 = const()[name = string("concat_1_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0_cast_uint16_to_int32, concat_1_values1_0))[name = string("concat_1")];
            tensor<bool, [2]> var_45_end_mask_0 = const()[name = string("op_45_end_mask_0"), val = tensor<bool, [2]>([false, true])];
            tensor<fp16, [448, 384]> positional_embedding_to_fp16 = const()[name = string("positional_embedding_to_fp16"), val = tensor<fp16, [448, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39832448)))];
            tensor<fp16, [?, ?]> var_45_cast_fp16 = slice_by_index(begin = concat_0, end = concat_1, end_mask = var_45_end_mask_0, x = positional_embedding_to_fp16)[name = string("op_45_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_3_cast_fp16 = add(x = var_42_cast_fp16, y = var_45_cast_fp16)[name = string("x_3_cast_fp16")];
            tensor<fp16, [4, 1, 448, 384]> read_state_0 = read_state(input = k_cache1)[name = string("read_state_0")];
            tensor<int32, [4]> k_cache_1_begin_0 = const()[name = string("k_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> k_cache_1_end_0 = const()[name = string("k_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 384])];
            tensor<bool, [4]> k_cache_1_end_mask_0 = const()[name = string("k_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_1_squeeze_mask_0 = const()[name = string("k_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> k_cache_1_cast_fp16 = slice_by_index(begin = k_cache_1_begin_0, end = k_cache_1_end_0, end_mask = k_cache_1_end_mask_0, squeeze_mask = k_cache_1_squeeze_mask_0, x = read_state_0)[name = string("k_cache_1_cast_fp16")];
            tensor<fp16, [4, 1, 448, 384]> read_state_1 = read_state(input = v_cache1)[name = string("read_state_1")];
            tensor<int32, [4]> v_cache_1_begin_0 = const()[name = string("v_cache_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> v_cache_1_end_0 = const()[name = string("v_cache_1_end_0"), val = tensor<int32, [4]>([1, 1, 448, 384])];
            tensor<bool, [4]> v_cache_1_end_mask_0 = const()[name = string("v_cache_1_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_1_squeeze_mask_0 = const()[name = string("v_cache_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> v_cache_1_cast_fp16 = slice_by_index(begin = v_cache_1_begin_0, end = v_cache_1_end_0, end_mask = v_cache_1_end_mask_0, squeeze_mask = v_cache_1_squeeze_mask_0, x = read_state_1)[name = string("v_cache_1_cast_fp16")];
            tensor<fp16, [4, 1, 1500, 384]> read_state_2 = read_state(input = k_cache2)[name = string("read_state_2")];
            tensor<int32, [4]> k_cache_3_begin_0 = const()[name = string("k_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> k_cache_3_end_0 = const()[name = string("k_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 384])];
            tensor<bool, [4]> k_cache_3_end_mask_0 = const()[name = string("k_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_3_squeeze_mask_0 = const()[name = string("k_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_cache_3_cast_fp16 = slice_by_index(begin = k_cache_3_begin_0, end = k_cache_3_end_0, end_mask = k_cache_3_end_mask_0, squeeze_mask = k_cache_3_squeeze_mask_0, x = read_state_2)[name = string("k_cache_3_cast_fp16")];
            tensor<fp16, [4, 1, 1500, 384]> read_state_3 = read_state(input = v_cache2)[name = string("read_state_3")];
            tensor<int32, [4]> v_cache_3_begin_0 = const()[name = string("v_cache_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> v_cache_3_end_0 = const()[name = string("v_cache_3_end_0"), val = tensor<int32, [4]>([1, 1, 1500, 384])];
            tensor<bool, [4]> v_cache_3_end_mask_0 = const()[name = string("v_cache_3_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_3_squeeze_mask_0 = const()[name = string("v_cache_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_cache_3_cast_fp16 = slice_by_index(begin = v_cache_3_begin_0, end = v_cache_3_end_0, end_mask = v_cache_3_end_mask_0, squeeze_mask = v_cache_3_squeeze_mask_0, x = read_state_3)[name = string("v_cache_3_cast_fp16")];
            int32 var_67 = const()[name = string("op_67"), val = int32(-1)];
            tensor<int32, [1]> var_85_axes_0 = const()[name = string("op_85_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_0_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40176576)))];
            tensor<fp16, [384]> blocks_0_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40177408)))];
            fp16 var_73_to_fp16 = const()[name = string("op_73_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, ?, 384]> var_85_cast_fp16 = layer_norm(axes = var_85_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = x_3_cast_fp16)[name = string("op_85_cast_fp16")];
            tensor<fp16, [384, 384]> var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40178240)))];
            tensor<fp16, [384]> var_97_to_fp16 = const()[name = string("op_97_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40473216)))];
            tensor<fp16, [1, ?, 384]> linear_0_cast_fp16 = linear(bias = var_97_to_fp16, weight = var_96_to_fp16, x = var_85_cast_fp16)[name = string("linear_0_cast_fp16")];
            tensor<fp16, [384, 384]> var_100_to_fp16 = const()[name = string("op_100_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40474048)))];
            tensor<fp16, [384]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769024)))];
            tensor<fp16, [1, ?, 384]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_100_to_fp16, x = var_85_cast_fp16)[name = string("linear_1_cast_fp16")];
            tensor<fp16, [384, 384]> var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40769856)))];
            tensor<fp16, [384]> var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41064832)))];
            tensor<fp16, [1, ?, 384]> linear_2_cast_fp16 = linear(bias = var_105_to_fp16, weight = var_104_to_fp16, x = var_85_cast_fp16)[name = string("linear_2_cast_fp16")];
            tensor<int32, [3]> var_107_shape_cast_fp16 = shape(x = linear_0_cast_fp16)[name = string("op_107_shape_cast_fp16")];
            int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)];
            int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)];
            bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)];
            string var_107_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_107_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
            uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)];
            tensor<uint16, [3]> var_107_shape_cast_fp16_to_uint16 = cast(dtype = var_107_shape_cast_fp16_to_uint16_dtype_0, x = var_107_shape_cast_fp16)[name = string("cast_54")];
            uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_107_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")];
            string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_53")];
            int32 end_step_3 = add(x = offset, y = gather_2_cast_uint16_to_int32)[name = string("end_step_3")];
            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_1_axes_0 = const()[name = string("expand_dims_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_1 = expand_dims(axes = expand_dims_1_axes_0, x = offset)[name = string("expand_dims_1")];
            tensor<int32, [1]> expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3_axes_0 = const()[name = string("expand_dims_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3 = expand_dims(axes = expand_dims_3_axes_0, x = end_step_3)[name = string("expand_dims_3")];
            tensor<int32, [1]> concat_4_values0_0 = const()[name = string("concat_4_values0_0"), val = tensor<int32, [1]>([0])];
            int32 concat_4_axis_0 = const()[name = string("concat_4_axis_0"), val = int32(0)];
            bool concat_4_interleave_0 = const()[name = string("concat_4_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_4 = concat(axis = concat_4_axis_0, interleave = concat_4_interleave_0, values = (concat_4_values0_0, expand_dims_0, expand_dims_1, expand_dims_2))[name = string("concat_4")];
            tensor<int32, [1]> concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_5_values1_0 = const()[name = string("concat_5_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_5_values3_0 = const()[name = string("concat_5_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)];
            bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, concat_5_values1_0, expand_dims_3, concat_5_values3_0))[name = string("concat_5")];
            tensor<int32, [4]> k_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = k_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = k_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_1_stride_0, update = linear_1_cast_fp16, x = read_state_0)[name = string("k_cache1_internal_tensor_assign_1_cast_fp16")];
            write_state(data = k_cache1_internal_tensor_assign_1_cast_fp16, input = k_cache1)[name = string("coreml_update_state_8_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_8 = read_state(input = k_cache1)[name = string("coreml_update_state_8")];
            tensor<int32, [4]> v_cache1_internal_tensor_assign_1_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_4, begin_mask = v_cache1_internal_tensor_assign_1_begin_mask_0, end = concat_5, end_mask = v_cache1_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_1_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_1_stride_0, update = linear_2_cast_fp16, x = read_state_1)[name = string("v_cache1_internal_tensor_assign_1_cast_fp16")];
            write_state(data = v_cache1_internal_tensor_assign_1_cast_fp16, input = v_cache1)[name = string("coreml_update_state_9_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_9 = read_state(input = v_cache1)[name = string("coreml_update_state_9")];
            int32 concat_10_values0_0 = const()[name = string("concat_10_values0_0"), val = int32(1)];
            int32 concat_10_values2_0 = const()[name = string("concat_10_values2_0"), val = int32(384)];
            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
            tensor<int32, [3]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (concat_10_values0_0, end_step_3, concat_10_values2_0))[name = string("concat_10")];
            tensor<int32, [3]> var_123_begin_0 = const()[name = string("op_123_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_123_end_mask_0 = const()[name = string("op_123_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_123_cast_fp16 = slice_by_index(begin = var_123_begin_0, end = concat_10, end_mask = var_123_end_mask_0, x = k_cache_1_cast_fp16)[name = string("op_123_cast_fp16")];
            tensor<int32, [3]> var_126_begin_0 = const()[name = string("op_126_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_126_end_mask_0 = const()[name = string("op_126_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_126_cast_fp16 = slice_by_index(begin = var_126_begin_0, end = concat_10, end_mask = var_126_end_mask_0, x = v_cache_1_cast_fp16)[name = string("op_126_cast_fp16")];
            tensor<int32, [4]> concat_12x = const()[name = string("concat_12x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_136_cast_fp16 = reshape(shape = concat_12x, x = linear_0_cast_fp16)[name = string("op_136_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_20_to_fp16)[name = string("q_3_cast_fp16")];
            tensor<int32, [4]> concat_13x = const()[name = string("concat_13x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_143_cast_fp16 = reshape(shape = concat_13x, x = var_123_cast_fp16)[name = string("op_143_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> k_5_cast_fp16 = mul(x = var_143_cast_fp16, y = const_21_to_fp16)[name = string("k_5_cast_fp16")];
            tensor<int32, [4]> concat_14x = const()[name = string("concat_14x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_150_cast_fp16 = reshape(shape = concat_14x, x = var_126_cast_fp16)[name = string("op_150_cast_fp16")];
            tensor<int32, [4]> var_151 = const()[name = string("op_151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_1_transpose_x_0 = const()[name = string("qk_1_transpose_x_0"), val = bool(false)];
            bool qk_1_transpose_y_0 = const()[name = string("qk_1_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_34_perm_0 = const()[name = string("transpose_34_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, ?]> transpose_34 = transpose(perm = transpose_34_perm_0, x = k_5_cast_fp16)[name = string("transpose_78")];
            tensor<fp16, [1, 6, ?, 64]> transpose_33 = transpose(perm = transpose_33_perm_0, x = q_3_cast_fp16)[name = string("transpose_79")];
            tensor<fp16, [1, 6, ?, ?]> qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_33, y = transpose_34)[name = string("qk_1_cast_fp16")];
            int32 concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = int32(448)];
            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_2_cast_uint16_to_int32, concat_15_values1_0))[name = string("concat_15")];
            tensor<int32, [2]> var_154_begin_0 = const()[name = string("op_154_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_154_end_mask_0 = const()[name = string("op_154_end_mask_0"), val = tensor<bool, [2]>([false, true])];
            tensor<fp16, [448, 448]> mask_to_fp16 = const()[name = string("mask_to_fp16"), val = tensor<fp16, [448, 448]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41065664)))];
            tensor<fp16, [?, 448]> var_154_cast_fp16 = slice_by_index(begin = var_154_begin_0, end = concat_15, end_mask = var_154_end_mask_0, x = mask_to_fp16)[name = string("op_154_cast_fp16")];
            int32 concat_16_values0_0 = const()[name = string("concat_16_values0_0"), val = int32(0)];
            int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)];
            bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (concat_16_values0_0, gather_2_cast_uint16_to_int32))[name = string("concat_16")];
            tensor<int32, [2]> var_155_begin_0 = const()[name = string("op_155_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_155_end_mask_0 = const()[name = string("op_155_end_mask_0"), val = tensor<bool, [2]>([true, false])];
            tensor<fp16, [?, ?]> var_155_cast_fp16 = slice_by_index(begin = var_155_begin_0, end = concat_16, end_mask = var_155_end_mask_0, x = var_154_cast_fp16)[name = string("op_155_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> qk_3_cast_fp16 = add(x = qk_1_cast_fp16, y = var_155_cast_fp16)[name = string("qk_3_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> var_158_cast_fp16 = softmax(axis = var_67, x = qk_3_cast_fp16)[name = string("op_158_cast_fp16")];
            bool var_160_transpose_x_0 = const()[name = string("op_160_transpose_x_0"), val = bool(false)];
            bool var_160_transpose_y_0 = const()[name = string("op_160_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, ?, 64]> v_5_cast_fp16 = transpose(perm = var_151, x = var_150_cast_fp16)[name = string("transpose_80")];
            tensor<fp16, [1, 6, ?, 64]> var_160_cast_fp16 = matmul(transpose_x = var_160_transpose_x_0, transpose_y = var_160_transpose_y_0, x = var_158_cast_fp16, y = v_5_cast_fp16)[name = string("op_160_cast_fp16")];
            tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_17x = const()[name = string("concat_17x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_162_cast_fp16 = transpose(perm = var_161, x = var_160_cast_fp16)[name = string("transpose_77")];
            tensor<fp16, [1, ?, 384]> x_7_cast_fp16 = reshape(shape = concat_17x, x = var_162_cast_fp16)[name = string("x_7_cast_fp16")];
            tensor<fp16, [384, 384]> var_166_to_fp16 = const()[name = string("op_166_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41467136)))];
            tensor<fp16, [384]> var_167_to_fp16 = const()[name = string("op_167_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762112)))];
            tensor<fp16, [1, ?, 384]> linear_3_cast_fp16 = linear(bias = var_167_to_fp16, weight = var_166_to_fp16, x = x_7_cast_fp16)[name = string("linear_3_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_9_cast_fp16 = add(x = x_3_cast_fp16, y = linear_3_cast_fp16)[name = string("x_9_cast_fp16")];
            tensor<int32, [1]> var_174_axes_0 = const()[name = string("op_174_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_0_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41762944)))];
            tensor<fp16, [384]> blocks_0_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_0_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41763776)))];
            tensor<fp16, [1, ?, 384]> var_174_cast_fp16 = layer_norm(axes = var_174_axes_0, beta = blocks_0_cross_attn_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_cross_attn_ln_weight_to_fp16, x = x_9_cast_fp16)[name = string("op_174_cast_fp16")];
            tensor<fp16, [384, 384]> var_183_to_fp16 = const()[name = string("op_183_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41764608)))];
            tensor<fp16, [384]> var_184_to_fp16 = const()[name = string("op_184_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42059584)))];
            tensor<fp16, [1, ?, 384]> linear_4_cast_fp16 = linear(bias = var_184_to_fp16, weight = var_183_to_fp16, x = var_174_cast_fp16)[name = string("linear_4_cast_fp16")];
            tensor<int32, [3]> concat_18 = const()[name = string("concat_18"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_19 = const()[name = string("concat_19"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> k_7_internal_tensor_assign_1_stride_0 = const()[name = string("k_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> k_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> k_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> k_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_7_to_fp16 = const()[name = string("k_7_to_fp16"), val = tensor<fp16, [1, 1500, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42060416)))];
            tensor<fp16, [1, 1500, 384]> k_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_18, begin_mask = k_7_internal_tensor_assign_1_begin_mask_0, end = concat_19, end_mask = k_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_7_internal_tensor_assign_1_squeeze_mask_0, stride = k_7_internal_tensor_assign_1_stride_0, update = k_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("k_7_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [3]> concat_20 = const()[name = string("concat_20"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_21 = const()[name = string("concat_21"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> v_7_internal_tensor_assign_1_stride_0 = const()[name = string("v_7_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> v_7_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> v_7_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> v_7_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_7_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_7_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_20, begin_mask = v_7_internal_tensor_assign_1_begin_mask_0, end = concat_21, end_mask = v_7_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_7_internal_tensor_assign_1_squeeze_mask_0, stride = v_7_internal_tensor_assign_1_stride_0, update = v_cache_3_cast_fp16, x = k_7_to_fp16)[name = string("v_7_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [4]> concat_22x = const()[name = string("concat_22x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_204_cast_fp16 = reshape(shape = concat_22x, x = linear_4_cast_fp16)[name = string("op_204_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_22_to_fp16 = const()[name = string("const_22_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_7_cast_fp16 = mul(x = var_204_cast_fp16, y = const_22_to_fp16)[name = string("q_7_cast_fp16")];
            tensor<int32, [4]> var_210 = const()[name = string("op_210"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_211_cast_fp16 = reshape(shape = var_210, x = k_7_internal_tensor_assign_1_cast_fp16)[name = string("op_211_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_23_to_fp16 = const()[name = string("const_23_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, 1500, 6, 64]> k_9_cast_fp16 = mul(x = var_211_cast_fp16, y = const_23_to_fp16)[name = string("k_9_cast_fp16")];
            tensor<int32, [4]> var_217 = const()[name = string("op_217"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_218_cast_fp16 = reshape(shape = var_217, x = v_7_internal_tensor_assign_1_cast_fp16)[name = string("op_218_cast_fp16")];
            tensor<int32, [4]> var_219 = const()[name = string("op_219"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_5_transpose_x_0 = const()[name = string("qk_5_transpose_x_0"), val = bool(false)];
            bool qk_5_transpose_y_0 = const()[name = string("qk_5_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_35_perm_0 = const()[name = string("transpose_35_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_36_perm_0 = const()[name = string("transpose_36_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, 1500]> transpose_36 = transpose(perm = transpose_36_perm_0, x = k_9_cast_fp16)[name = string("transpose_74")];
            tensor<fp16, [1, 6, ?, 64]> transpose_35 = transpose(perm = transpose_35_perm_0, x = q_7_cast_fp16)[name = string("transpose_75")];
            tensor<fp16, [1, 6, ?, 1500]> qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_35, y = transpose_36)[name = string("qk_5_cast_fp16")];
            tensor<fp16, [1, 6, ?, 1500]> var_223_cast_fp16 = softmax(axis = var_67, x = qk_5_cast_fp16)[name = string("op_223_cast_fp16")];
            bool var_225_transpose_x_0 = const()[name = string("op_225_transpose_x_0"), val = bool(false)];
            bool var_225_transpose_y_0 = const()[name = string("op_225_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, 1500, 64]> v_9_cast_fp16 = transpose(perm = var_219, x = var_218_cast_fp16)[name = string("transpose_76")];
            tensor<fp16, [1, 6, ?, 64]> var_225_cast_fp16 = matmul(transpose_x = var_225_transpose_x_0, transpose_y = var_225_transpose_y_0, x = var_223_cast_fp16, y = v_9_cast_fp16)[name = string("op_225_cast_fp16")];
            tensor<int32, [4]> var_226 = const()[name = string("op_226"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_23x = const()[name = string("concat_23x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_227_cast_fp16 = transpose(perm = var_226, x = var_225_cast_fp16)[name = string("transpose_73")];
            tensor<fp16, [1, ?, 384]> x_13_cast_fp16 = reshape(shape = concat_23x, x = var_227_cast_fp16)[name = string("x_13_cast_fp16")];
            tensor<fp16, [384, 384]> var_231_to_fp16 = const()[name = string("op_231_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43212480)))];
            tensor<fp16, [384]> var_232_to_fp16 = const()[name = string("op_232_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43507456)))];
            tensor<fp16, [1, ?, 384]> linear_5_cast_fp16 = linear(bias = var_232_to_fp16, weight = var_231_to_fp16, x = x_13_cast_fp16)[name = string("linear_5_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_15_cast_fp16 = add(x = x_9_cast_fp16, y = linear_5_cast_fp16)[name = string("x_15_cast_fp16")];
            tensor<int32, [1]> var_239_axes_0 = const()[name = string("op_239_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_0_mlp_ln_weight_to_fp16 = const()[name = string("blocks_0_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43508288)))];
            tensor<fp16, [384]> blocks_0_mlp_ln_bias_to_fp16 = const()[name = string("blocks_0_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509120)))];
            tensor<fp16, [1, ?, 384]> var_239_cast_fp16 = layer_norm(axes = var_239_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_73_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_15_cast_fp16)[name = string("op_239_cast_fp16")];
            tensor<fp16, [1536, 384]> var_248_to_fp16 = const()[name = string("op_248_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43509952)))];
            tensor<fp16, [1536]> var_249_to_fp16 = const()[name = string("op_249_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44689664)))];
            tensor<fp16, [1, ?, 1536]> linear_6_cast_fp16 = linear(bias = var_249_to_fp16, weight = var_248_to_fp16, x = var_239_cast_fp16)[name = string("linear_6_cast_fp16")];
            string x_19_mode_0 = const()[name = string("x_19_mode_0"), val = string("EXACT")];
            tensor<fp16, [1, ?, 1536]> x_19_cast_fp16 = gelu(mode = x_19_mode_0, x = linear_6_cast_fp16)[name = string("x_19_cast_fp16")];
            tensor<fp16, [384, 1536]> var_254_to_fp16 = const()[name = string("op_254_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44692800)))];
            tensor<fp16, [384]> var_255_to_fp16 = const()[name = string("op_255_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45872512)))];
            tensor<fp16, [1, ?, 384]> linear_7_cast_fp16 = linear(bias = var_255_to_fp16, weight = var_254_to_fp16, x = x_19_cast_fp16)[name = string("linear_7_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_21_cast_fp16 = add(x = x_15_cast_fp16, y = linear_7_cast_fp16)[name = string("x_21_cast_fp16")];
            tensor<int32, [4]> k_cache_5_begin_0 = const()[name = string("k_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
            tensor<int32, [4]> k_cache_5_end_0 = const()[name = string("k_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 384])];
            tensor<bool, [4]> k_cache_5_end_mask_0 = const()[name = string("k_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_5_squeeze_mask_0 = const()[name = string("k_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> k_cache_5_cast_fp16 = slice_by_index(begin = k_cache_5_begin_0, end = k_cache_5_end_0, end_mask = k_cache_5_end_mask_0, squeeze_mask = k_cache_5_squeeze_mask_0, x = coreml_update_state_8)[name = string("k_cache_5_cast_fp16")];
            tensor<int32, [4]> v_cache_5_begin_0 = const()[name = string("v_cache_5_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
            tensor<int32, [4]> v_cache_5_end_0 = const()[name = string("v_cache_5_end_0"), val = tensor<int32, [4]>([2, 1, 448, 384])];
            tensor<bool, [4]> v_cache_5_end_mask_0 = const()[name = string("v_cache_5_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_5_squeeze_mask_0 = const()[name = string("v_cache_5_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> v_cache_5_cast_fp16 = slice_by_index(begin = v_cache_5_begin_0, end = v_cache_5_end_0, end_mask = v_cache_5_end_mask_0, squeeze_mask = v_cache_5_squeeze_mask_0, x = coreml_update_state_9)[name = string("v_cache_5_cast_fp16")];
            tensor<int32, [4]> k_cache_7_begin_0 = const()[name = string("k_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
            tensor<int32, [4]> k_cache_7_end_0 = const()[name = string("k_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 384])];
            tensor<bool, [4]> k_cache_7_end_mask_0 = const()[name = string("k_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_7_squeeze_mask_0 = const()[name = string("k_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_cache_7_cast_fp16 = slice_by_index(begin = k_cache_7_begin_0, end = k_cache_7_end_0, end_mask = k_cache_7_end_mask_0, squeeze_mask = k_cache_7_squeeze_mask_0, x = read_state_2)[name = string("k_cache_7_cast_fp16")];
            tensor<int32, [4]> v_cache_7_begin_0 = const()[name = string("v_cache_7_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
            tensor<int32, [4]> v_cache_7_end_0 = const()[name = string("v_cache_7_end_0"), val = tensor<int32, [4]>([2, 1, 1500, 384])];
            tensor<bool, [4]> v_cache_7_end_mask_0 = const()[name = string("v_cache_7_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_7_squeeze_mask_0 = const()[name = string("v_cache_7_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_cache_7_cast_fp16 = slice_by_index(begin = v_cache_7_begin_0, end = v_cache_7_end_0, end_mask = v_cache_7_end_mask_0, squeeze_mask = v_cache_7_squeeze_mask_0, x = read_state_3)[name = string("v_cache_7_cast_fp16")];
            int32 var_277 = const()[name = string("op_277"), val = int32(-1)];
            tensor<int32, [1]> var_295_axes_0 = const()[name = string("op_295_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_1_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45873344)))];
            tensor<fp16, [384]> blocks_1_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45874176)))];
            fp16 var_283_to_fp16 = const()[name = string("op_283_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, ?, 384]> var_295_cast_fp16 = layer_norm(axes = var_295_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_21_cast_fp16)[name = string("op_295_cast_fp16")];
            tensor<fp16, [384, 384]> var_306_to_fp16 = const()[name = string("op_306_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45875008)))];
            tensor<fp16, [384]> var_307_to_fp16 = const()[name = string("op_307_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46169984)))];
            tensor<fp16, [1, ?, 384]> linear_8_cast_fp16 = linear(bias = var_307_to_fp16, weight = var_306_to_fp16, x = var_295_cast_fp16)[name = string("linear_8_cast_fp16")];
            tensor<fp16, [384, 384]> var_310_to_fp16 = const()[name = string("op_310_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46170816)))];
            tensor<fp16, [1, ?, 384]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_310_to_fp16, x = var_295_cast_fp16)[name = string("linear_9_cast_fp16")];
            tensor<fp16, [384, 384]> var_314_to_fp16 = const()[name = string("op_314_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46465792)))];
            tensor<fp16, [384]> var_315_to_fp16 = const()[name = string("op_315_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46760768)))];
            tensor<fp16, [1, ?, 384]> linear_10_cast_fp16 = linear(bias = var_315_to_fp16, weight = var_314_to_fp16, x = var_295_cast_fp16)[name = string("linear_10_cast_fp16")];
            tensor<int32, [3]> var_317_shape_cast_fp16 = shape(x = linear_8_cast_fp16)[name = string("op_317_shape_cast_fp16")];
            int32 gather_14_axis_0 = const()[name = string("gather_14_axis_0"), val = int32(0)];
            int32 gather_14_batch_dims_0 = const()[name = string("gather_14_batch_dims_0"), val = int32(0)];
            bool gather_14_validate_indices_0 = const()[name = string("gather_14_validate_indices_0"), val = bool(false)];
            string var_317_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_317_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
            uint16 select_14_to_uint16 = const()[name = string("select_14_to_uint16"), val = uint16(1)];
            tensor<uint16, [3]> var_317_shape_cast_fp16_to_uint16 = cast(dtype = var_317_shape_cast_fp16_to_uint16_dtype_0, x = var_317_shape_cast_fp16)[name = string("cast_52")];
            uint16 gather_14_cast_uint16 = gather(axis = gather_14_axis_0, batch_dims = gather_14_batch_dims_0, indices = select_14_to_uint16, validate_indices = gather_14_validate_indices_0, x = var_317_shape_cast_fp16_to_uint16)[name = string("gather_14_cast_uint16")];
            string gather_14_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_14_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            int32 gather_14_cast_uint16_to_int32 = cast(dtype = gather_14_cast_uint16_to_int32_dtype_0, x = gather_14_cast_uint16)[name = string("cast_51")];
            int32 end_step_5 = add(x = offset, y = gather_14_cast_uint16_to_int32)[name = string("end_step_5")];
            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_19_axes_0 = const()[name = string("expand_dims_19_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_19 = expand_dims(axes = expand_dims_19_axes_0, x = end_step_5)[name = string("expand_dims_19")];
            tensor<int32, [1]> concat_26_values0_0 = const()[name = string("concat_26_values0_0"), val = tensor<int32, [1]>([1])];
            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (concat_26_values0_0, expand_dims_16, expand_dims_1, expand_dims_18))[name = string("concat_26")];
            tensor<int32, [1]> concat_27_values0_0 = const()[name = string("concat_27_values0_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (concat_27_values0_0, concat_27_values1_0, expand_dims_19, concat_27_values3_0))[name = string("concat_27")];
            tensor<int32, [4]> k_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = k_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = k_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_2_stride_0, update = linear_9_cast_fp16, x = coreml_update_state_8)[name = string("k_cache1_internal_tensor_assign_2_cast_fp16")];
            write_state(data = k_cache1_internal_tensor_assign_2_cast_fp16, input = k_cache1)[name = string("coreml_update_state_10_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_10 = read_state(input = k_cache1)[name = string("coreml_update_state_10")];
            tensor<int32, [4]> v_cache1_internal_tensor_assign_2_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_26, begin_mask = v_cache1_internal_tensor_assign_2_begin_mask_0, end = concat_27, end_mask = v_cache1_internal_tensor_assign_2_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_2_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_2_stride_0, update = linear_10_cast_fp16, x = coreml_update_state_9)[name = string("v_cache1_internal_tensor_assign_2_cast_fp16")];
            write_state(data = v_cache1_internal_tensor_assign_2_cast_fp16, input = v_cache1)[name = string("coreml_update_state_11_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_11 = read_state(input = v_cache1)[name = string("coreml_update_state_11")];
            int32 concat_32_values0_0 = const()[name = string("concat_32_values0_0"), val = int32(1)];
            int32 concat_32_values2_0 = const()[name = string("concat_32_values2_0"), val = int32(384)];
            int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)];
            bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)];
            tensor<int32, [3]> concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (concat_32_values0_0, end_step_5, concat_32_values2_0))[name = string("concat_32")];
            tensor<int32, [3]> var_333_begin_0 = const()[name = string("op_333_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_333_end_mask_0 = const()[name = string("op_333_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = concat_32, end_mask = var_333_end_mask_0, x = k_cache_5_cast_fp16)[name = string("op_333_cast_fp16")];
            tensor<int32, [3]> var_336_begin_0 = const()[name = string("op_336_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_336_end_mask_0 = const()[name = string("op_336_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = concat_32, end_mask = var_336_end_mask_0, x = v_cache_5_cast_fp16)[name = string("op_336_cast_fp16")];
            tensor<int32, [4]> concat_34x = const()[name = string("concat_34x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_346_cast_fp16 = reshape(shape = concat_34x, x = linear_8_cast_fp16)[name = string("op_346_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_24_to_fp16 = const()[name = string("const_24_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_11_cast_fp16 = mul(x = var_346_cast_fp16, y = const_24_to_fp16)[name = string("q_11_cast_fp16")];
            tensor<int32, [4]> concat_35x = const()[name = string("concat_35x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_353_cast_fp16 = reshape(shape = concat_35x, x = var_333_cast_fp16)[name = string("op_353_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_25_to_fp16 = const()[name = string("const_25_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> k_15_cast_fp16 = mul(x = var_353_cast_fp16, y = const_25_to_fp16)[name = string("k_15_cast_fp16")];
            tensor<int32, [4]> concat_36x = const()[name = string("concat_36x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_360_cast_fp16 = reshape(shape = concat_36x, x = var_336_cast_fp16)[name = string("op_360_cast_fp16")];
            tensor<int32, [4]> var_361 = const()[name = string("op_361"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_7_transpose_x_0 = const()[name = string("qk_7_transpose_x_0"), val = bool(false)];
            bool qk_7_transpose_y_0 = const()[name = string("qk_7_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_38_perm_0 = const()[name = string("transpose_38_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, ?]> transpose_38 = transpose(perm = transpose_38_perm_0, x = k_15_cast_fp16)[name = string("transpose_70")];
            tensor<fp16, [1, 6, ?, 64]> transpose_37 = transpose(perm = transpose_37_perm_0, x = q_11_cast_fp16)[name = string("transpose_71")];
            tensor<fp16, [1, 6, ?, ?]> qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_37, y = transpose_38)[name = string("qk_7_cast_fp16")];
            int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(448)];
            int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)];
            bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_14_cast_uint16_to_int32, concat_37_values1_0))[name = string("concat_37")];
            tensor<int32, [2]> var_364_begin_0 = const()[name = string("op_364_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_364_end_mask_0 = const()[name = string("op_364_end_mask_0"), val = tensor<bool, [2]>([false, true])];
            tensor<fp16, [?, 448]> var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = concat_37, end_mask = var_364_end_mask_0, x = mask_to_fp16)[name = string("op_364_cast_fp16")];
            int32 concat_38_values0_0 = const()[name = string("concat_38_values0_0"), val = int32(0)];
            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (concat_38_values0_0, gather_14_cast_uint16_to_int32))[name = string("concat_38")];
            tensor<int32, [2]> var_365_begin_0 = const()[name = string("op_365_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_365_end_mask_0 = const()[name = string("op_365_end_mask_0"), val = tensor<bool, [2]>([true, false])];
            tensor<fp16, [?, ?]> var_365_cast_fp16 = slice_by_index(begin = var_365_begin_0, end = concat_38, end_mask = var_365_end_mask_0, x = var_364_cast_fp16)[name = string("op_365_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> qk_9_cast_fp16 = add(x = qk_7_cast_fp16, y = var_365_cast_fp16)[name = string("qk_9_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> var_368_cast_fp16 = softmax(axis = var_277, x = qk_9_cast_fp16)[name = string("op_368_cast_fp16")];
            bool var_370_transpose_x_0 = const()[name = string("op_370_transpose_x_0"), val = bool(false)];
            bool var_370_transpose_y_0 = const()[name = string("op_370_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, ?, 64]> v_15_cast_fp16 = transpose(perm = var_361, x = var_360_cast_fp16)[name = string("transpose_72")];
            tensor<fp16, [1, 6, ?, 64]> var_370_cast_fp16 = matmul(transpose_x = var_370_transpose_x_0, transpose_y = var_370_transpose_y_0, x = var_368_cast_fp16, y = v_15_cast_fp16)[name = string("op_370_cast_fp16")];
            tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_39x = const()[name = string("concat_39x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_372_cast_fp16 = transpose(perm = var_371, x = var_370_cast_fp16)[name = string("transpose_69")];
            tensor<fp16, [1, ?, 384]> x_25_cast_fp16 = reshape(shape = concat_39x, x = var_372_cast_fp16)[name = string("x_25_cast_fp16")];
            tensor<fp16, [384, 384]> var_376_to_fp16 = const()[name = string("op_376_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46761600)))];
            tensor<fp16, [384]> var_377_to_fp16 = const()[name = string("op_377_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47056576)))];
            tensor<fp16, [1, ?, 384]> linear_11_cast_fp16 = linear(bias = var_377_to_fp16, weight = var_376_to_fp16, x = x_25_cast_fp16)[name = string("linear_11_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_27_cast_fp16 = add(x = x_21_cast_fp16, y = linear_11_cast_fp16)[name = string("x_27_cast_fp16")];
            tensor<int32, [1]> var_384_axes_0 = const()[name = string("op_384_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_1_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47057408)))];
            tensor<fp16, [384]> blocks_1_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_1_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47058240)))];
            tensor<fp16, [1, ?, 384]> var_384_cast_fp16 = layer_norm(axes = var_384_axes_0, beta = blocks_1_cross_attn_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_cross_attn_ln_weight_to_fp16, x = x_27_cast_fp16)[name = string("op_384_cast_fp16")];
            tensor<fp16, [384, 384]> var_393_to_fp16 = const()[name = string("op_393_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47059072)))];
            tensor<fp16, [384]> var_394_to_fp16 = const()[name = string("op_394_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354048)))];
            tensor<fp16, [1, ?, 384]> linear_12_cast_fp16 = linear(bias = var_394_to_fp16, weight = var_393_to_fp16, x = var_384_cast_fp16)[name = string("linear_12_cast_fp16")];
            tensor<int32, [3]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_41 = const()[name = string("concat_41"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> k_17_internal_tensor_assign_1_stride_0 = const()[name = string("k_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> k_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> k_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> k_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_40, begin_mask = k_17_internal_tensor_assign_1_begin_mask_0, end = concat_41, end_mask = k_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_17_internal_tensor_assign_1_squeeze_mask_0, stride = k_17_internal_tensor_assign_1_stride_0, update = k_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("k_17_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [3]> concat_42 = const()[name = string("concat_42"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> v_17_internal_tensor_assign_1_stride_0 = const()[name = string("v_17_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> v_17_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> v_17_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> v_17_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_17_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_17_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_42, begin_mask = v_17_internal_tensor_assign_1_begin_mask_0, end = concat_43, end_mask = v_17_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_17_internal_tensor_assign_1_squeeze_mask_0, stride = v_17_internal_tensor_assign_1_stride_0, update = v_cache_7_cast_fp16, x = k_7_to_fp16)[name = string("v_17_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [4]> concat_44x = const()[name = string("concat_44x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_414_cast_fp16 = reshape(shape = concat_44x, x = linear_12_cast_fp16)[name = string("op_414_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_26_to_fp16 = const()[name = string("const_26_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_15_cast_fp16 = mul(x = var_414_cast_fp16, y = const_26_to_fp16)[name = string("q_15_cast_fp16")];
            tensor<int32, [4]> var_420 = const()[name = string("op_420"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_421_cast_fp16 = reshape(shape = var_420, x = k_17_internal_tensor_assign_1_cast_fp16)[name = string("op_421_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_27_to_fp16 = const()[name = string("const_27_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, 1500, 6, 64]> k_19_cast_fp16 = mul(x = var_421_cast_fp16, y = const_27_to_fp16)[name = string("k_19_cast_fp16")];
            tensor<int32, [4]> var_427 = const()[name = string("op_427"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_428_cast_fp16 = reshape(shape = var_427, x = v_17_internal_tensor_assign_1_cast_fp16)[name = string("op_428_cast_fp16")];
            tensor<int32, [4]> var_429 = const()[name = string("op_429"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_11_transpose_x_0 = const()[name = string("qk_11_transpose_x_0"), val = bool(false)];
            bool qk_11_transpose_y_0 = const()[name = string("qk_11_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_39_perm_0 = const()[name = string("transpose_39_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_40_perm_0 = const()[name = string("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, 1500]> transpose_40 = transpose(perm = transpose_40_perm_0, x = k_19_cast_fp16)[name = string("transpose_66")];
            tensor<fp16, [1, 6, ?, 64]> transpose_39 = transpose(perm = transpose_39_perm_0, x = q_15_cast_fp16)[name = string("transpose_67")];
            tensor<fp16, [1, 6, ?, 1500]> qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_39, y = transpose_40)[name = string("qk_11_cast_fp16")];
            tensor<fp16, [1, 6, ?, 1500]> var_433_cast_fp16 = softmax(axis = var_277, x = qk_11_cast_fp16)[name = string("op_433_cast_fp16")];
            bool var_435_transpose_x_0 = const()[name = string("op_435_transpose_x_0"), val = bool(false)];
            bool var_435_transpose_y_0 = const()[name = string("op_435_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, 1500, 64]> v_19_cast_fp16 = transpose(perm = var_429, x = var_428_cast_fp16)[name = string("transpose_68")];
            tensor<fp16, [1, 6, ?, 64]> var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_0, transpose_y = var_435_transpose_y_0, x = var_433_cast_fp16, y = v_19_cast_fp16)[name = string("op_435_cast_fp16")];
            tensor<int32, [4]> var_436 = const()[name = string("op_436"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_45x = const()[name = string("concat_45x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_437_cast_fp16 = transpose(perm = var_436, x = var_435_cast_fp16)[name = string("transpose_65")];
            tensor<fp16, [1, ?, 384]> x_31_cast_fp16 = reshape(shape = concat_45x, x = var_437_cast_fp16)[name = string("x_31_cast_fp16")];
            tensor<fp16, [384, 384]> var_441_to_fp16 = const()[name = string("op_441_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47354880)))];
            tensor<fp16, [384]> var_442_to_fp16 = const()[name = string("op_442_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47649856)))];
            tensor<fp16, [1, ?, 384]> linear_13_cast_fp16 = linear(bias = var_442_to_fp16, weight = var_441_to_fp16, x = x_31_cast_fp16)[name = string("linear_13_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_33_cast_fp16 = add(x = x_27_cast_fp16, y = linear_13_cast_fp16)[name = string("x_33_cast_fp16")];
            tensor<int32, [1]> var_449_axes_0 = const()[name = string("op_449_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_1_mlp_ln_weight_to_fp16 = const()[name = string("blocks_1_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47650688)))];
            tensor<fp16, [384]> blocks_1_mlp_ln_bias_to_fp16 = const()[name = string("blocks_1_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47651520)))];
            tensor<fp16, [1, ?, 384]> var_449_cast_fp16 = layer_norm(axes = var_449_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_283_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_33_cast_fp16)[name = string("op_449_cast_fp16")];
            tensor<fp16, [1536, 384]> var_458_to_fp16 = const()[name = string("op_458_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47652352)))];
            tensor<fp16, [1536]> var_459_to_fp16 = const()[name = string("op_459_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48832064)))];
            tensor<fp16, [1, ?, 1536]> linear_14_cast_fp16 = linear(bias = var_459_to_fp16, weight = var_458_to_fp16, x = var_449_cast_fp16)[name = string("linear_14_cast_fp16")];
            string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("EXACT")];
            tensor<fp16, [1, ?, 1536]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = linear_14_cast_fp16)[name = string("x_37_cast_fp16")];
            tensor<fp16, [384, 1536]> var_464_to_fp16 = const()[name = string("op_464_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48835200)))];
            tensor<fp16, [384]> var_465_to_fp16 = const()[name = string("op_465_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50014912)))];
            tensor<fp16, [1, ?, 384]> linear_15_cast_fp16 = linear(bias = var_465_to_fp16, weight = var_464_to_fp16, x = x_37_cast_fp16)[name = string("linear_15_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_39_cast_fp16 = add(x = x_33_cast_fp16, y = linear_15_cast_fp16)[name = string("x_39_cast_fp16")];
            tensor<int32, [4]> k_cache_9_begin_0 = const()[name = string("k_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
            tensor<int32, [4]> k_cache_9_end_0 = const()[name = string("k_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 384])];
            tensor<bool, [4]> k_cache_9_end_mask_0 = const()[name = string("k_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_9_squeeze_mask_0 = const()[name = string("k_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> k_cache_9_cast_fp16 = slice_by_index(begin = k_cache_9_begin_0, end = k_cache_9_end_0, end_mask = k_cache_9_end_mask_0, squeeze_mask = k_cache_9_squeeze_mask_0, x = coreml_update_state_10)[name = string("k_cache_9_cast_fp16")];
            tensor<int32, [4]> v_cache_9_begin_0 = const()[name = string("v_cache_9_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
            tensor<int32, [4]> v_cache_9_end_0 = const()[name = string("v_cache_9_end_0"), val = tensor<int32, [4]>([3, 1, 448, 384])];
            tensor<bool, [4]> v_cache_9_end_mask_0 = const()[name = string("v_cache_9_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_9_squeeze_mask_0 = const()[name = string("v_cache_9_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> v_cache_9_cast_fp16 = slice_by_index(begin = v_cache_9_begin_0, end = v_cache_9_end_0, end_mask = v_cache_9_end_mask_0, squeeze_mask = v_cache_9_squeeze_mask_0, x = coreml_update_state_11)[name = string("v_cache_9_cast_fp16")];
            tensor<int32, [4]> k_cache_11_begin_0 = const()[name = string("k_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
            tensor<int32, [4]> k_cache_11_end_0 = const()[name = string("k_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 384])];
            tensor<bool, [4]> k_cache_11_end_mask_0 = const()[name = string("k_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_11_squeeze_mask_0 = const()[name = string("k_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_cache_11_cast_fp16 = slice_by_index(begin = k_cache_11_begin_0, end = k_cache_11_end_0, end_mask = k_cache_11_end_mask_0, squeeze_mask = k_cache_11_squeeze_mask_0, x = read_state_2)[name = string("k_cache_11_cast_fp16")];
            tensor<int32, [4]> v_cache_11_begin_0 = const()[name = string("v_cache_11_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
            tensor<int32, [4]> v_cache_11_end_0 = const()[name = string("v_cache_11_end_0"), val = tensor<int32, [4]>([3, 1, 1500, 384])];
            tensor<bool, [4]> v_cache_11_end_mask_0 = const()[name = string("v_cache_11_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_11_squeeze_mask_0 = const()[name = string("v_cache_11_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_cache_11_cast_fp16 = slice_by_index(begin = v_cache_11_begin_0, end = v_cache_11_end_0, end_mask = v_cache_11_end_mask_0, squeeze_mask = v_cache_11_squeeze_mask_0, x = read_state_3)[name = string("v_cache_11_cast_fp16")];
            int32 var_487 = const()[name = string("op_487"), val = int32(-1)];
            tensor<int32, [1]> var_505_axes_0 = const()[name = string("op_505_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_2_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50015744)))];
            tensor<fp16, [384]> blocks_2_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50016576)))];
            fp16 var_493_to_fp16 = const()[name = string("op_493_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, ?, 384]> var_505_cast_fp16 = layer_norm(axes = var_505_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_39_cast_fp16)[name = string("op_505_cast_fp16")];
            tensor<fp16, [384, 384]> var_516_to_fp16 = const()[name = string("op_516_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50017408)))];
            tensor<fp16, [384]> var_517_to_fp16 = const()[name = string("op_517_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50312384)))];
            tensor<fp16, [1, ?, 384]> linear_16_cast_fp16 = linear(bias = var_517_to_fp16, weight = var_516_to_fp16, x = var_505_cast_fp16)[name = string("linear_16_cast_fp16")];
            tensor<fp16, [384, 384]> var_520_to_fp16 = const()[name = string("op_520_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50313216)))];
            tensor<fp16, [1, ?, 384]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_520_to_fp16, x = var_505_cast_fp16)[name = string("linear_17_cast_fp16")];
            tensor<fp16, [384, 384]> var_524_to_fp16 = const()[name = string("op_524_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50608192)))];
            tensor<fp16, [384]> var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50903168)))];
            tensor<fp16, [1, ?, 384]> linear_18_cast_fp16 = linear(bias = var_525_to_fp16, weight = var_524_to_fp16, x = var_505_cast_fp16)[name = string("linear_18_cast_fp16")];
            tensor<int32, [3]> var_527_shape_cast_fp16 = shape(x = linear_16_cast_fp16)[name = string("op_527_shape_cast_fp16")];
            int32 gather_26_axis_0 = const()[name = string("gather_26_axis_0"), val = int32(0)];
            int32 gather_26_batch_dims_0 = const()[name = string("gather_26_batch_dims_0"), val = int32(0)];
            bool gather_26_validate_indices_0 = const()[name = string("gather_26_validate_indices_0"), val = bool(false)];
            string var_527_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_527_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
            uint16 select_26_to_uint16 = const()[name = string("select_26_to_uint16"), val = uint16(1)];
            tensor<uint16, [3]> var_527_shape_cast_fp16_to_uint16 = cast(dtype = var_527_shape_cast_fp16_to_uint16_dtype_0, x = var_527_shape_cast_fp16)[name = string("cast_50")];
            uint16 gather_26_cast_uint16 = gather(axis = gather_26_axis_0, batch_dims = gather_26_batch_dims_0, indices = select_26_to_uint16, validate_indices = gather_26_validate_indices_0, x = var_527_shape_cast_fp16_to_uint16)[name = string("gather_26_cast_uint16")];
            string gather_26_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_26_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            int32 gather_26_cast_uint16_to_int32 = cast(dtype = gather_26_cast_uint16_to_int32_dtype_0, x = gather_26_cast_uint16)[name = string("cast_49")];
            int32 end_step_7 = add(x = offset, y = gather_26_cast_uint16_to_int32)[name = string("end_step_7")];
            tensor<int32, [1]> expand_dims_32 = const()[name = string("expand_dims_32"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_35_axes_0 = const()[name = string("expand_dims_35_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_35 = expand_dims(axes = expand_dims_35_axes_0, x = end_step_7)[name = string("expand_dims_35")];
            tensor<int32, [1]> concat_48_values0_0 = const()[name = string("concat_48_values0_0"), val = tensor<int32, [1]>([2])];
            int32 concat_48_axis_0 = const()[name = string("concat_48_axis_0"), val = int32(0)];
            bool concat_48_interleave_0 = const()[name = string("concat_48_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_48 = concat(axis = concat_48_axis_0, interleave = concat_48_interleave_0, values = (concat_48_values0_0, expand_dims_32, expand_dims_1, expand_dims_34))[name = string("concat_48")];
            tensor<int32, [1]> concat_49_values0_0 = const()[name = string("concat_49_values0_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_49_values1_0 = const()[name = string("concat_49_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_49_values3_0 = const()[name = string("concat_49_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_49_axis_0 = const()[name = string("concat_49_axis_0"), val = int32(0)];
            bool concat_49_interleave_0 = const()[name = string("concat_49_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_49 = concat(axis = concat_49_axis_0, interleave = concat_49_interleave_0, values = (concat_49_values0_0, concat_49_values1_0, expand_dims_35, concat_49_values3_0))[name = string("concat_49")];
            tensor<int32, [4]> k_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = k_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = k_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_3_stride_0, update = linear_17_cast_fp16, x = coreml_update_state_10)[name = string("k_cache1_internal_tensor_assign_3_cast_fp16")];
            write_state(data = k_cache1_internal_tensor_assign_3_cast_fp16, input = k_cache1)[name = string("coreml_update_state_12_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_12 = read_state(input = k_cache1)[name = string("coreml_update_state_12")];
            tensor<int32, [4]> v_cache1_internal_tensor_assign_3_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_48, begin_mask = v_cache1_internal_tensor_assign_3_begin_mask_0, end = concat_49, end_mask = v_cache1_internal_tensor_assign_3_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_3_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_3_stride_0, update = linear_18_cast_fp16, x = coreml_update_state_11)[name = string("v_cache1_internal_tensor_assign_3_cast_fp16")];
            write_state(data = v_cache1_internal_tensor_assign_3_cast_fp16, input = v_cache1)[name = string("coreml_update_state_13_write_state")];
            tensor<fp16, [4, 1, 448, 384]> coreml_update_state_13 = read_state(input = v_cache1)[name = string("coreml_update_state_13")];
            int32 concat_54_values0_0 = const()[name = string("concat_54_values0_0"), val = int32(1)];
            int32 concat_54_values2_0 = const()[name = string("concat_54_values2_0"), val = int32(384)];
            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
            tensor<int32, [3]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (concat_54_values0_0, end_step_7, concat_54_values2_0))[name = string("concat_54")];
            tensor<int32, [3]> var_543_begin_0 = const()[name = string("op_543_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_543_end_mask_0 = const()[name = string("op_543_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_543_cast_fp16 = slice_by_index(begin = var_543_begin_0, end = concat_54, end_mask = var_543_end_mask_0, x = k_cache_9_cast_fp16)[name = string("op_543_cast_fp16")];
            tensor<int32, [3]> var_546_begin_0 = const()[name = string("op_546_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_546_end_mask_0 = const()[name = string("op_546_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_546_cast_fp16 = slice_by_index(begin = var_546_begin_0, end = concat_54, end_mask = var_546_end_mask_0, x = v_cache_9_cast_fp16)[name = string("op_546_cast_fp16")];
            tensor<int32, [4]> concat_56x = const()[name = string("concat_56x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_556_cast_fp16 = reshape(shape = concat_56x, x = linear_16_cast_fp16)[name = string("op_556_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_28_to_fp16 = const()[name = string("const_28_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_19_cast_fp16 = mul(x = var_556_cast_fp16, y = const_28_to_fp16)[name = string("q_19_cast_fp16")];
            tensor<int32, [4]> concat_57x = const()[name = string("concat_57x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_563_cast_fp16 = reshape(shape = concat_57x, x = var_543_cast_fp16)[name = string("op_563_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_29_to_fp16 = const()[name = string("const_29_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> k_25_cast_fp16 = mul(x = var_563_cast_fp16, y = const_29_to_fp16)[name = string("k_25_cast_fp16")];
            tensor<int32, [4]> concat_58x = const()[name = string("concat_58x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_570_cast_fp16 = reshape(shape = concat_58x, x = var_546_cast_fp16)[name = string("op_570_cast_fp16")];
            tensor<int32, [4]> var_571 = const()[name = string("op_571"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_13_transpose_x_0 = const()[name = string("qk_13_transpose_x_0"), val = bool(false)];
            bool qk_13_transpose_y_0 = const()[name = string("qk_13_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_42_perm_0 = const()[name = string("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, ?]> transpose_42 = transpose(perm = transpose_42_perm_0, x = k_25_cast_fp16)[name = string("transpose_62")];
            tensor<fp16, [1, 6, ?, 64]> transpose_41 = transpose(perm = transpose_41_perm_0, x = q_19_cast_fp16)[name = string("transpose_63")];
            tensor<fp16, [1, 6, ?, ?]> qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_41, y = transpose_42)[name = string("qk_13_cast_fp16")];
            int32 concat_59_values1_0 = const()[name = string("concat_59_values1_0"), val = int32(448)];
            int32 concat_59_axis_0 = const()[name = string("concat_59_axis_0"), val = int32(0)];
            bool concat_59_interleave_0 = const()[name = string("concat_59_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_59 = concat(axis = concat_59_axis_0, interleave = concat_59_interleave_0, values = (gather_26_cast_uint16_to_int32, concat_59_values1_0))[name = string("concat_59")];
            tensor<int32, [2]> var_574_begin_0 = const()[name = string("op_574_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_574_end_mask_0 = const()[name = string("op_574_end_mask_0"), val = tensor<bool, [2]>([false, true])];
            tensor<fp16, [?, 448]> var_574_cast_fp16 = slice_by_index(begin = var_574_begin_0, end = concat_59, end_mask = var_574_end_mask_0, x = mask_to_fp16)[name = string("op_574_cast_fp16")];
            int32 concat_60_values0_0 = const()[name = string("concat_60_values0_0"), val = int32(0)];
            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (concat_60_values0_0, gather_26_cast_uint16_to_int32))[name = string("concat_60")];
            tensor<int32, [2]> var_575_begin_0 = const()[name = string("op_575_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_575_end_mask_0 = const()[name = string("op_575_end_mask_0"), val = tensor<bool, [2]>([true, false])];
            tensor<fp16, [?, ?]> var_575_cast_fp16 = slice_by_index(begin = var_575_begin_0, end = concat_60, end_mask = var_575_end_mask_0, x = var_574_cast_fp16)[name = string("op_575_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> qk_15_cast_fp16 = add(x = qk_13_cast_fp16, y = var_575_cast_fp16)[name = string("qk_15_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> var_578_cast_fp16 = softmax(axis = var_487, x = qk_15_cast_fp16)[name = string("op_578_cast_fp16")];
            bool var_580_transpose_x_0 = const()[name = string("op_580_transpose_x_0"), val = bool(false)];
            bool var_580_transpose_y_0 = const()[name = string("op_580_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, ?, 64]> v_25_cast_fp16 = transpose(perm = var_571, x = var_570_cast_fp16)[name = string("transpose_64")];
            tensor<fp16, [1, 6, ?, 64]> var_580_cast_fp16 = matmul(transpose_x = var_580_transpose_x_0, transpose_y = var_580_transpose_y_0, x = var_578_cast_fp16, y = v_25_cast_fp16)[name = string("op_580_cast_fp16")];
            tensor<int32, [4]> var_581 = const()[name = string("op_581"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_61x = const()[name = string("concat_61x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_582_cast_fp16 = transpose(perm = var_581, x = var_580_cast_fp16)[name = string("transpose_61")];
            tensor<fp16, [1, ?, 384]> x_43_cast_fp16 = reshape(shape = concat_61x, x = var_582_cast_fp16)[name = string("x_43_cast_fp16")];
            tensor<fp16, [384, 384]> var_586_to_fp16 = const()[name = string("op_586_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50904000)))];
            tensor<fp16, [384]> var_587_to_fp16 = const()[name = string("op_587_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51198976)))];
            tensor<fp16, [1, ?, 384]> linear_19_cast_fp16 = linear(bias = var_587_to_fp16, weight = var_586_to_fp16, x = x_43_cast_fp16)[name = string("linear_19_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_45_cast_fp16 = add(x = x_39_cast_fp16, y = linear_19_cast_fp16)[name = string("x_45_cast_fp16")];
            tensor<int32, [1]> var_594_axes_0 = const()[name = string("op_594_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_2_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51199808)))];
            tensor<fp16, [384]> blocks_2_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_2_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51200640)))];
            tensor<fp16, [1, ?, 384]> var_594_cast_fp16 = layer_norm(axes = var_594_axes_0, beta = blocks_2_cross_attn_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_cross_attn_ln_weight_to_fp16, x = x_45_cast_fp16)[name = string("op_594_cast_fp16")];
            tensor<fp16, [384, 384]> var_603_to_fp16 = const()[name = string("op_603_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51201472)))];
            tensor<fp16, [384]> var_604_to_fp16 = const()[name = string("op_604_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51496448)))];
            tensor<fp16, [1, ?, 384]> linear_20_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = var_594_cast_fp16)[name = string("linear_20_cast_fp16")];
            tensor<int32, [3]> concat_62 = const()[name = string("concat_62"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_63 = const()[name = string("concat_63"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> k_27_internal_tensor_assign_1_stride_0 = const()[name = string("k_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> k_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> k_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> k_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_62, begin_mask = k_27_internal_tensor_assign_1_begin_mask_0, end = concat_63, end_mask = k_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_27_internal_tensor_assign_1_squeeze_mask_0, stride = k_27_internal_tensor_assign_1_stride_0, update = k_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("k_27_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [3]> concat_64 = const()[name = string("concat_64"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_65 = const()[name = string("concat_65"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> v_27_internal_tensor_assign_1_stride_0 = const()[name = string("v_27_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> v_27_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> v_27_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> v_27_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_27_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_27_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_64, begin_mask = v_27_internal_tensor_assign_1_begin_mask_0, end = concat_65, end_mask = v_27_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_27_internal_tensor_assign_1_squeeze_mask_0, stride = v_27_internal_tensor_assign_1_stride_0, update = v_cache_11_cast_fp16, x = k_7_to_fp16)[name = string("v_27_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [4]> concat_66x = const()[name = string("concat_66x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_624_cast_fp16 = reshape(shape = concat_66x, x = linear_20_cast_fp16)[name = string("op_624_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_23_cast_fp16 = mul(x = var_624_cast_fp16, y = const_30_to_fp16)[name = string("q_23_cast_fp16")];
            tensor<int32, [4]> var_630 = const()[name = string("op_630"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_631_cast_fp16 = reshape(shape = var_630, x = k_27_internal_tensor_assign_1_cast_fp16)[name = string("op_631_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, 1500, 6, 64]> k_29_cast_fp16 = mul(x = var_631_cast_fp16, y = const_31_to_fp16)[name = string("k_29_cast_fp16")];
            tensor<int32, [4]> var_637 = const()[name = string("op_637"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_638_cast_fp16 = reshape(shape = var_637, x = v_27_internal_tensor_assign_1_cast_fp16)[name = string("op_638_cast_fp16")];
            tensor<int32, [4]> var_639 = const()[name = string("op_639"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_17_transpose_x_0 = const()[name = string("qk_17_transpose_x_0"), val = bool(false)];
            bool qk_17_transpose_y_0 = const()[name = string("qk_17_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_43_perm_0 = const()[name = string("transpose_43_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_44_perm_0 = const()[name = string("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, 1500]> transpose_44 = transpose(perm = transpose_44_perm_0, x = k_29_cast_fp16)[name = string("transpose_58")];
            tensor<fp16, [1, 6, ?, 64]> transpose_43 = transpose(perm = transpose_43_perm_0, x = q_23_cast_fp16)[name = string("transpose_59")];
            tensor<fp16, [1, 6, ?, 1500]> qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_43, y = transpose_44)[name = string("qk_17_cast_fp16")];
            tensor<fp16, [1, 6, ?, 1500]> var_643_cast_fp16 = softmax(axis = var_487, x = qk_17_cast_fp16)[name = string("op_643_cast_fp16")];
            bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)];
            bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, 1500, 64]> v_29_cast_fp16 = transpose(perm = var_639, x = var_638_cast_fp16)[name = string("transpose_60")];
            tensor<fp16, [1, 6, ?, 64]> var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = var_643_cast_fp16, y = v_29_cast_fp16)[name = string("op_645_cast_fp16")];
            tensor<int32, [4]> var_646 = const()[name = string("op_646"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_67x = const()[name = string("concat_67x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_647_cast_fp16 = transpose(perm = var_646, x = var_645_cast_fp16)[name = string("transpose_57")];
            tensor<fp16, [1, ?, 384]> x_49_cast_fp16 = reshape(shape = concat_67x, x = var_647_cast_fp16)[name = string("x_49_cast_fp16")];
            tensor<fp16, [384, 384]> var_651_to_fp16 = const()[name = string("op_651_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51497280)))];
            tensor<fp16, [384]> var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51792256)))];
            tensor<fp16, [1, ?, 384]> linear_21_cast_fp16 = linear(bias = var_652_to_fp16, weight = var_651_to_fp16, x = x_49_cast_fp16)[name = string("linear_21_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_21_cast_fp16)[name = string("x_51_cast_fp16")];
            tensor<int32, [1]> var_659_axes_0 = const()[name = string("op_659_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_2_mlp_ln_weight_to_fp16 = const()[name = string("blocks_2_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793088)))];
            tensor<fp16, [384]> blocks_2_mlp_ln_bias_to_fp16 = const()[name = string("blocks_2_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51793920)))];
            tensor<fp16, [1, ?, 384]> var_659_cast_fp16 = layer_norm(axes = var_659_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_493_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_51_cast_fp16)[name = string("op_659_cast_fp16")];
            tensor<fp16, [1536, 384]> var_668_to_fp16 = const()[name = string("op_668_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51794752)))];
            tensor<fp16, [1536]> var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52974464)))];
            tensor<fp16, [1, ?, 1536]> linear_22_cast_fp16 = linear(bias = var_669_to_fp16, weight = var_668_to_fp16, x = var_659_cast_fp16)[name = string("linear_22_cast_fp16")];
            string x_55_mode_0 = const()[name = string("x_55_mode_0"), val = string("EXACT")];
            tensor<fp16, [1, ?, 1536]> x_55_cast_fp16 = gelu(mode = x_55_mode_0, x = linear_22_cast_fp16)[name = string("x_55_cast_fp16")];
            tensor<fp16, [384, 1536]> var_674_to_fp16 = const()[name = string("op_674_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52977600)))];
            tensor<fp16, [384]> var_675_to_fp16 = const()[name = string("op_675_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54157312)))];
            tensor<fp16, [1, ?, 384]> linear_23_cast_fp16 = linear(bias = var_675_to_fp16, weight = var_674_to_fp16, x = x_55_cast_fp16)[name = string("linear_23_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_57_cast_fp16 = add(x = x_51_cast_fp16, y = linear_23_cast_fp16)[name = string("x_57_cast_fp16")];
            tensor<int32, [4]> k_cache_13_begin_0 = const()[name = string("k_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
            tensor<int32, [4]> k_cache_13_end_0 = const()[name = string("k_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 384])];
            tensor<bool, [4]> k_cache_13_end_mask_0 = const()[name = string("k_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_13_squeeze_mask_0 = const()[name = string("k_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> k_cache_13_cast_fp16 = slice_by_index(begin = k_cache_13_begin_0, end = k_cache_13_end_0, end_mask = k_cache_13_end_mask_0, squeeze_mask = k_cache_13_squeeze_mask_0, x = coreml_update_state_12)[name = string("k_cache_13_cast_fp16")];
            tensor<int32, [4]> v_cache_13_begin_0 = const()[name = string("v_cache_13_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
            tensor<int32, [4]> v_cache_13_end_0 = const()[name = string("v_cache_13_end_0"), val = tensor<int32, [4]>([4, 1, 448, 384])];
            tensor<bool, [4]> v_cache_13_end_mask_0 = const()[name = string("v_cache_13_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_13_squeeze_mask_0 = const()[name = string("v_cache_13_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 448, 384]> v_cache_13_cast_fp16 = slice_by_index(begin = v_cache_13_begin_0, end = v_cache_13_end_0, end_mask = v_cache_13_end_mask_0, squeeze_mask = v_cache_13_squeeze_mask_0, x = coreml_update_state_13)[name = string("v_cache_13_cast_fp16")];
            tensor<int32, [4]> k_cache_begin_0 = const()[name = string("k_cache_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
            tensor<int32, [4]> k_cache_end_0 = const()[name = string("k_cache_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 384])];
            tensor<bool, [4]> k_cache_end_mask_0 = const()[name = string("k_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> k_cache_squeeze_mask_0 = const()[name = string("k_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_cache_cast_fp16 = slice_by_index(begin = k_cache_begin_0, end = k_cache_end_0, end_mask = k_cache_end_mask_0, squeeze_mask = k_cache_squeeze_mask_0, x = read_state_2)[name = string("k_cache_cast_fp16")];
            tensor<int32, [4]> v_cache_begin_0 = const()[name = string("v_cache_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
            tensor<int32, [4]> v_cache_end_0 = const()[name = string("v_cache_end_0"), val = tensor<int32, [4]>([4, 1, 1500, 384])];
            tensor<bool, [4]> v_cache_end_mask_0 = const()[name = string("v_cache_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<bool, [4]> v_cache_squeeze_mask_0 = const()[name = string("v_cache_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_cache_cast_fp16 = slice_by_index(begin = v_cache_begin_0, end = v_cache_end_0, end_mask = v_cache_end_mask_0, squeeze_mask = v_cache_squeeze_mask_0, x = read_state_3)[name = string("v_cache_cast_fp16")];
            int32 var_697 = const()[name = string("op_697"), val = int32(-1)];
            tensor<int32, [1]> var_715_axes_0 = const()[name = string("op_715_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_3_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158144)))];
            tensor<fp16, [384]> blocks_3_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54158976)))];
            fp16 var_703_to_fp16 = const()[name = string("op_703_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, ?, 384]> var_715_cast_fp16 = layer_norm(axes = var_715_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_57_cast_fp16)[name = string("op_715_cast_fp16")];
            tensor<fp16, [384, 384]> var_726_to_fp16 = const()[name = string("op_726_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54159808)))];
            tensor<fp16, [384]> var_727_to_fp16 = const()[name = string("op_727_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54454784)))];
            tensor<fp16, [1, ?, 384]> linear_24_cast_fp16 = linear(bias = var_727_to_fp16, weight = var_726_to_fp16, x = var_715_cast_fp16)[name = string("linear_24_cast_fp16")];
            tensor<fp16, [384, 384]> var_730_to_fp16 = const()[name = string("op_730_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54455616)))];
            tensor<fp16, [1, ?, 384]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_730_to_fp16, x = var_715_cast_fp16)[name = string("linear_25_cast_fp16")];
            tensor<fp16, [384, 384]> var_734_to_fp16 = const()[name = string("op_734_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54750592)))];
            tensor<fp16, [384]> var_735_to_fp16 = const()[name = string("op_735_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55045568)))];
            tensor<fp16, [1, ?, 384]> linear_26_cast_fp16 = linear(bias = var_735_to_fp16, weight = var_734_to_fp16, x = var_715_cast_fp16)[name = string("linear_26_cast_fp16")];
            tensor<int32, [3]> var_737_shape_cast_fp16 = shape(x = linear_24_cast_fp16)[name = string("op_737_shape_cast_fp16")];
            int32 gather_38_axis_0 = const()[name = string("gather_38_axis_0"), val = int32(0)];
            int32 gather_38_batch_dims_0 = const()[name = string("gather_38_batch_dims_0"), val = int32(0)];
            bool gather_38_validate_indices_0 = const()[name = string("gather_38_validate_indices_0"), val = bool(false)];
            string var_737_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_737_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")];
            uint16 select_38_to_uint16 = const()[name = string("select_38_to_uint16"), val = uint16(1)];
            tensor<uint16, [3]> var_737_shape_cast_fp16_to_uint16 = cast(dtype = var_737_shape_cast_fp16_to_uint16_dtype_0, x = var_737_shape_cast_fp16)[name = string("cast_48")];
            uint16 gather_38_cast_uint16 = gather(axis = gather_38_axis_0, batch_dims = gather_38_batch_dims_0, indices = select_38_to_uint16, validate_indices = gather_38_validate_indices_0, x = var_737_shape_cast_fp16_to_uint16)[name = string("gather_38_cast_uint16")];
            string gather_38_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_38_cast_uint16_to_int32_dtype_0"), val = string("int32")];
            int32 gather_38_cast_uint16_to_int32 = cast(dtype = gather_38_cast_uint16_to_int32_dtype_0, x = gather_38_cast_uint16)[name = string("cast_47")];
            int32 end_step = add(x = offset, y = gather_38_cast_uint16_to_int32)[name = string("end_step")];
            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_50 = const()[name = string("expand_dims_50"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_51_axes_0 = const()[name = string("expand_dims_51_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_51 = expand_dims(axes = expand_dims_51_axes_0, x = end_step)[name = string("expand_dims_51")];
            tensor<int32, [1]> concat_70_values0_0 = const()[name = string("concat_70_values0_0"), val = tensor<int32, [1]>([3])];
            int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)];
            bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (concat_70_values0_0, expand_dims_48, expand_dims_1, expand_dims_50))[name = string("concat_70")];
            tensor<int32, [1]> concat_71_values0_0 = const()[name = string("concat_71_values0_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_71_values1_0 = const()[name = string("concat_71_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_71_values3_0 = const()[name = string("concat_71_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)];
            bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (concat_71_values0_0, concat_71_values1_0, expand_dims_51, concat_71_values3_0))[name = string("concat_71")];
            tensor<int32, [4]> k_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("k_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> k_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("k_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> k_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = k_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = k_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = k_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = k_cache1_internal_tensor_assign_4_stride_0, update = linear_25_cast_fp16, x = coreml_update_state_12)[name = string("k_cache1_internal_tensor_assign_4_cast_fp16")];
            write_state(data = k_cache1_internal_tensor_assign_4_cast_fp16, input = k_cache1)[name = string("coreml_update_state_14_write_state")];
            tensor<int32, [4]> v_cache1_internal_tensor_assign_4_stride_0 = const()[name = string("v_cache1_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_begin_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_end_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> v_cache1_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("v_cache1_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([true, false, false, false])];
            tensor<fp16, [4, 1, 448, 384]> v_cache1_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_70, begin_mask = v_cache1_internal_tensor_assign_4_begin_mask_0, end = concat_71, end_mask = v_cache1_internal_tensor_assign_4_end_mask_0, squeeze_mask = v_cache1_internal_tensor_assign_4_squeeze_mask_0, stride = v_cache1_internal_tensor_assign_4_stride_0, update = linear_26_cast_fp16, x = coreml_update_state_13)[name = string("v_cache1_internal_tensor_assign_4_cast_fp16")];
            write_state(data = v_cache1_internal_tensor_assign_4_cast_fp16, input = v_cache1)[name = string("coreml_update_state_15_write_state")];
            int32 concat_76_values0_0 = const()[name = string("concat_76_values0_0"), val = int32(1)];
            int32 concat_76_values2_0 = const()[name = string("concat_76_values2_0"), val = int32(384)];
            int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)];
            bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)];
            tensor<int32, [3]> concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (concat_76_values0_0, end_step, concat_76_values2_0))[name = string("concat_76")];
            tensor<int32, [3]> var_753_begin_0 = const()[name = string("op_753_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_753_end_mask_0 = const()[name = string("op_753_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = concat_76, end_mask = var_753_end_mask_0, x = k_cache_13_cast_fp16)[name = string("op_753_cast_fp16")];
            tensor<int32, [3]> var_756_begin_0 = const()[name = string("op_756_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<bool, [3]> var_756_end_mask_0 = const()[name = string("op_756_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<fp16, [1, ?, 384]> var_756_cast_fp16 = slice_by_index(begin = var_756_begin_0, end = concat_76, end_mask = var_756_end_mask_0, x = v_cache_13_cast_fp16)[name = string("op_756_cast_fp16")];
            tensor<int32, [4]> concat_78x = const()[name = string("concat_78x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_766_cast_fp16 = reshape(shape = concat_78x, x = linear_24_cast_fp16)[name = string("op_766_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_32_to_fp16 = const()[name = string("const_32_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_27_cast_fp16 = mul(x = var_766_cast_fp16, y = const_32_to_fp16)[name = string("q_27_cast_fp16")];
            tensor<int32, [4]> concat_79x = const()[name = string("concat_79x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_773_cast_fp16 = reshape(shape = concat_79x, x = var_753_cast_fp16)[name = string("op_773_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> k_35_cast_fp16 = mul(x = var_773_cast_fp16, y = const_33_to_fp16)[name = string("k_35_cast_fp16")];
            tensor<int32, [4]> concat_80x = const()[name = string("concat_80x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_780_cast_fp16 = reshape(shape = concat_80x, x = var_756_cast_fp16)[name = string("op_780_cast_fp16")];
            tensor<int32, [4]> var_781 = const()[name = string("op_781"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_19_transpose_x_0 = const()[name = string("qk_19_transpose_x_0"), val = bool(false)];
            bool qk_19_transpose_y_0 = const()[name = string("qk_19_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_46_perm_0 = const()[name = string("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, ?]> transpose_46 = transpose(perm = transpose_46_perm_0, x = k_35_cast_fp16)[name = string("transpose_54")];
            tensor<fp16, [1, 6, ?, 64]> transpose_45 = transpose(perm = transpose_45_perm_0, x = q_27_cast_fp16)[name = string("transpose_55")];
            tensor<fp16, [1, 6, ?, ?]> qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_45, y = transpose_46)[name = string("qk_19_cast_fp16")];
            int32 concat_81_values1_0 = const()[name = string("concat_81_values1_0"), val = int32(448)];
            int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)];
            bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (gather_38_cast_uint16_to_int32, concat_81_values1_0))[name = string("concat_81")];
            tensor<int32, [2]> var_784_begin_0 = const()[name = string("op_784_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_784_end_mask_0 = const()[name = string("op_784_end_mask_0"), val = tensor<bool, [2]>([false, true])];
            tensor<fp16, [?, 448]> var_784_cast_fp16 = slice_by_index(begin = var_784_begin_0, end = concat_81, end_mask = var_784_end_mask_0, x = mask_to_fp16)[name = string("op_784_cast_fp16")];
            int32 concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = int32(0)];
            int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)];
            bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)];
            tensor<int32, [2]> concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, gather_38_cast_uint16_to_int32))[name = string("concat_82")];
            tensor<int32, [2]> var_785_begin_0 = const()[name = string("op_785_begin_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<bool, [2]> var_785_end_mask_0 = const()[name = string("op_785_end_mask_0"), val = tensor<bool, [2]>([true, false])];
            tensor<fp16, [?, ?]> var_785_cast_fp16 = slice_by_index(begin = var_785_begin_0, end = concat_82, end_mask = var_785_end_mask_0, x = var_784_cast_fp16)[name = string("op_785_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> qk_21_cast_fp16 = add(x = qk_19_cast_fp16, y = var_785_cast_fp16)[name = string("qk_21_cast_fp16")];
            tensor<fp16, [1, 6, ?, ?]> var_788_cast_fp16 = softmax(axis = var_697, x = qk_21_cast_fp16)[name = string("op_788_cast_fp16")];
            bool var_790_transpose_x_0 = const()[name = string("op_790_transpose_x_0"), val = bool(false)];
            bool var_790_transpose_y_0 = const()[name = string("op_790_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, ?, 64]> v_35_cast_fp16 = transpose(perm = var_781, x = var_780_cast_fp16)[name = string("transpose_56")];
            tensor<fp16, [1, 6, ?, 64]> var_790_cast_fp16 = matmul(transpose_x = var_790_transpose_x_0, transpose_y = var_790_transpose_y_0, x = var_788_cast_fp16, y = v_35_cast_fp16)[name = string("op_790_cast_fp16")];
            tensor<int32, [4]> var_791 = const()[name = string("op_791"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_83x = const()[name = string("concat_83x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_792_cast_fp16 = transpose(perm = var_791, x = var_790_cast_fp16)[name = string("transpose_53")];
            tensor<fp16, [1, ?, 384]> x_61_cast_fp16 = reshape(shape = concat_83x, x = var_792_cast_fp16)[name = string("x_61_cast_fp16")];
            tensor<fp16, [384, 384]> var_796_to_fp16 = const()[name = string("op_796_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55046400)))];
            tensor<fp16, [384]> var_797_to_fp16 = const()[name = string("op_797_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55341376)))];
            tensor<fp16, [1, ?, 384]> linear_27_cast_fp16 = linear(bias = var_797_to_fp16, weight = var_796_to_fp16, x = x_61_cast_fp16)[name = string("linear_27_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_63_cast_fp16 = add(x = x_57_cast_fp16, y = linear_27_cast_fp16)[name = string("x_63_cast_fp16")];
            tensor<int32, [1]> var_804_axes_0 = const()[name = string("op_804_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_3_cross_attn_ln_weight_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55342208)))];
            tensor<fp16, [384]> blocks_3_cross_attn_ln_bias_to_fp16 = const()[name = string("blocks_3_cross_attn_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343040)))];
            tensor<fp16, [1, ?, 384]> var_804_cast_fp16 = layer_norm(axes = var_804_axes_0, beta = blocks_3_cross_attn_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_cross_attn_ln_weight_to_fp16, x = x_63_cast_fp16)[name = string("op_804_cast_fp16")];
            tensor<fp16, [384, 384]> var_813_to_fp16 = const()[name = string("op_813_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55343872)))];
            tensor<fp16, [384]> var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55638848)))];
            tensor<fp16, [1, ?, 384]> linear_28_cast_fp16 = linear(bias = var_814_to_fp16, weight = var_813_to_fp16, x = var_804_cast_fp16)[name = string("linear_28_cast_fp16")];
            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> k_37_internal_tensor_assign_1_stride_0 = const()[name = string("k_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> k_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> k_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> k_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("k_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> k_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_84, begin_mask = k_37_internal_tensor_assign_1_begin_mask_0, end = concat_85, end_mask = k_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = k_37_internal_tensor_assign_1_squeeze_mask_0, stride = k_37_internal_tensor_assign_1_stride_0, update = k_cache_cast_fp16, x = k_7_to_fp16)[name = string("k_37_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [3]> concat_86 = const()[name = string("concat_86"), val = tensor<int32, [3]>([0, 0, 0])];
            tensor<int32, [3]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [3]>([0, 1500, 0])];
            tensor<int32, [3]> v_37_internal_tensor_assign_1_stride_0 = const()[name = string("v_37_internal_tensor_assign_1_stride_0"), val = tensor<int32, [3]>([1, 1, 1])];
            tensor<bool, [3]> v_37_internal_tensor_assign_1_begin_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<bool, [3]> v_37_internal_tensor_assign_1_end_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [3]>([true, false, true])];
            tensor<bool, [3]> v_37_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("v_37_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, false])];
            tensor<fp16, [1, 1500, 384]> v_37_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_86, begin_mask = v_37_internal_tensor_assign_1_begin_mask_0, end = concat_87, end_mask = v_37_internal_tensor_assign_1_end_mask_0, squeeze_mask = v_37_internal_tensor_assign_1_squeeze_mask_0, stride = v_37_internal_tensor_assign_1_stride_0, update = v_cache_cast_fp16, x = k_7_to_fp16)[name = string("v_37_internal_tensor_assign_1_cast_fp16")];
            tensor<int32, [4]> concat_88x = const()[name = string("concat_88x"), val = tensor<int32, [4]>([1, -1, 6, 64])];
            tensor<fp16, [1, ?, 6, 64]> var_834_cast_fp16 = reshape(shape = concat_88x, x = linear_28_cast_fp16)[name = string("op_834_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, ?, 6, 64]> q_cast_fp16 = mul(x = var_834_cast_fp16, y = const_34_to_fp16)[name = string("q_cast_fp16")];
            tensor<int32, [4]> var_840 = const()[name = string("op_840"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_841_cast_fp16 = reshape(shape = var_840, x = k_37_internal_tensor_assign_1_cast_fp16)[name = string("op_841_cast_fp16")];
            tensor<fp16, [1, 1, 1, 1]> const_35_to_fp16 = const()[name = string("const_35_to_fp16"), val = tensor<fp16, [1, 1, 1, 1]>([[[[0x1.6ap-2]]]])];
            tensor<fp16, [1, 1500, 6, 64]> k_cast_fp16 = mul(x = var_841_cast_fp16, y = const_35_to_fp16)[name = string("k_cast_fp16")];
            tensor<int32, [4]> var_847 = const()[name = string("op_847"), val = tensor<int32, [4]>([1, 1500, 6, -1])];
            tensor<fp16, [1, 1500, 6, 64]> var_848_cast_fp16 = reshape(shape = var_847, x = v_37_internal_tensor_assign_1_cast_fp16)[name = string("op_848_cast_fp16")];
            tensor<int32, [4]> var_849 = const()[name = string("op_849"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            bool qk_transpose_x_0 = const()[name = string("qk_transpose_x_0"), val = bool(false)];
            bool qk_transpose_y_0 = const()[name = string("qk_transpose_y_0"), val = bool(false)];
            tensor<int32, [4]> transpose_47_perm_0 = const()[name = string("transpose_47_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_48_perm_0 = const()[name = string("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 6, 64, 1500]> transpose_48 = transpose(perm = transpose_48_perm_0, x = k_cast_fp16)[name = string("transpose_50")];
            tensor<fp16, [1, 6, ?, 64]> transpose_47 = transpose(perm = transpose_47_perm_0, x = q_cast_fp16)[name = string("transpose_51")];
            tensor<fp16, [1, 6, ?, 1500]> qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_47, y = transpose_48)[name = string("qk_cast_fp16")];
            tensor<fp16, [1, 6, ?, 1500]> var_853_cast_fp16 = softmax(axis = var_697, x = qk_cast_fp16)[name = string("op_853_cast_fp16")];
            bool var_855_transpose_x_0 = const()[name = string("op_855_transpose_x_0"), val = bool(false)];
            bool var_855_transpose_y_0 = const()[name = string("op_855_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 6, 1500, 64]> v_cast_fp16 = transpose(perm = var_849, x = var_848_cast_fp16)[name = string("transpose_52")];
            tensor<fp16, [1, 6, ?, 64]> var_855_cast_fp16 = matmul(transpose_x = var_855_transpose_x_0, transpose_y = var_855_transpose_y_0, x = var_853_cast_fp16, y = v_cast_fp16)[name = string("op_855_cast_fp16")];
            tensor<int32, [4]> var_856 = const()[name = string("op_856"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> concat_89x = const()[name = string("concat_89x"), val = tensor<int32, [3]>([1, -1, 384])];
            tensor<fp16, [1, ?, 6, 64]> var_857_cast_fp16 = transpose(perm = var_856, x = var_855_cast_fp16)[name = string("transpose_49")];
            tensor<fp16, [1, ?, 384]> x_67_cast_fp16 = reshape(shape = concat_89x, x = var_857_cast_fp16)[name = string("x_67_cast_fp16")];
            tensor<fp16, [384, 384]> var_861_to_fp16 = const()[name = string("op_861_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55639680)))];
            tensor<fp16, [384]> var_862_to_fp16 = const()[name = string("op_862_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55934656)))];
            tensor<fp16, [1, ?, 384]> linear_29_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = x_67_cast_fp16)[name = string("linear_29_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_69_cast_fp16 = add(x = x_63_cast_fp16, y = linear_29_cast_fp16)[name = string("x_69_cast_fp16")];
            tensor<int32, [1]> var_869_axes_0 = const()[name = string("op_869_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> blocks_3_mlp_ln_weight_to_fp16 = const()[name = string("blocks_3_mlp_ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55935488)))];
            tensor<fp16, [384]> blocks_3_mlp_ln_bias_to_fp16 = const()[name = string("blocks_3_mlp_ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55936320)))];
            tensor<fp16, [1, ?, 384]> var_869_cast_fp16 = layer_norm(axes = var_869_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_703_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_69_cast_fp16)[name = string("op_869_cast_fp16")];
            tensor<fp16, [1536, 384]> var_878_to_fp16 = const()[name = string("op_878_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55937152)))];
            tensor<fp16, [1536]> var_879_to_fp16 = const()[name = string("op_879_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57116864)))];
            tensor<fp16, [1, ?, 1536]> linear_30_cast_fp16 = linear(bias = var_879_to_fp16, weight = var_878_to_fp16, x = var_869_cast_fp16)[name = string("linear_30_cast_fp16")];
            string x_73_mode_0 = const()[name = string("x_73_mode_0"), val = string("EXACT")];
            tensor<fp16, [1, ?, 1536]> x_73_cast_fp16 = gelu(mode = x_73_mode_0, x = linear_30_cast_fp16)[name = string("x_73_cast_fp16")];
            tensor<fp16, [384, 1536]> var_884_to_fp16 = const()[name = string("op_884_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57120000)))];
            tensor<fp16, [384]> var_885_to_fp16 = const()[name = string("op_885_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58299712)))];
            tensor<fp16, [1, ?, 384]> linear_31_cast_fp16 = linear(bias = var_885_to_fp16, weight = var_884_to_fp16, x = x_73_cast_fp16)[name = string("linear_31_cast_fp16")];
            tensor<fp16, [1, ?, 384]> x_75_cast_fp16 = add(x = x_69_cast_fp16, y = linear_31_cast_fp16)[name = string("x_75_cast_fp16")];
            tensor<int32, [1]> var_898_axes_0 = const()[name = string("op_898_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [384]> ln_weight_to_fp16 = const()[name = string("ln_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58300544)))];
            tensor<fp16, [384]> ln_bias_to_fp16 = const()[name = string("ln_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58301376)))];
            fp16 var_889_to_fp16 = const()[name = string("op_889_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, ?, 384]> var_898_cast_fp16 = layer_norm(axes = var_898_axes_0, beta = ln_bias_to_fp16, epsilon = var_889_to_fp16, gamma = ln_weight_to_fp16, x = x_75_cast_fp16)[name = string("op_898_cast_fp16")];
            tensor<fp16, [51865]> var_908_bias_0_to_fp16 = const()[name = string("op_908_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58302208)))];
            tensor<fp16, [1, ?, 51865]> logits = linear(bias = var_908_bias_0_to_fp16, weight = token_embedding_weight_to_fp16, x = var_898_cast_fp16)[name = string("op_908_cast_fp16")];
        } -> (logits);
}