dacorvo HF Staff commited on
Commit
53085a9
·
verified ·
1 Parent(s): 45a603c

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +21 -0
  2. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/2e075469bb7e246e19de.json +79 -0
  3. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/63936687f0615be0c4b5.json +79 -0
  4. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/67d11d4985c28ac7d5ff.json +79 -0
  5. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/cd03fa772b07fbb454c0.json +79 -0
  6. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/e5eddc28add59e2ee9d6.json +79 -0
  7. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/21c419fc8bcc3104cc37.json +79 -0
  8. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/e14cd546176a864ee18b.json +79 -0
  9. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/e99ce6fcf5be305e9424.json +79 -0
  10. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/1cacc2cc42865a8b000d.json +79 -0
  11. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/619f2f5356bdf0f59205.json +79 -0
  12. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/67bc8c12b9b1221b38ad.json +79 -0
  13. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0f44022ffda5d90427b3.json +72 -0
  14. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/1471022e22b5b8b3de3b.json +72 -0
  15. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c9c911d79dbddb873dcf.json +72 -0
  16. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/ibm-granite/granite-3.1-2b-instruct/b66dbd045f3e73eb7427.json +72 -0
  17. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/20b7299fc104cc69ce0f.json +76 -0
  18. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/502c5523ff7585cd3287.json +76 -0
  19. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/9c27c3f4a94313192c63.json +76 -0
  20. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/f0cd6ae3a57125c25463.json +76 -0
  21. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/unsloth/Llama-3.2-1B-Instruct/d4cdf18983e9784091ec.json +77 -0
  22. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/34134d6beddf69d6e6c0.json +72 -0
  23. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/4625dfce3a6ec89cfa42.json +72 -0
  24. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/f790967cf96542a801bc.json +72 -0
  25. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/c3b4f930a6c51ad40028.json +76 -0
  26. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/63d7815d3de40f55a3ae.json +73 -0
  27. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/892d9f2862662e5407c6.json +73 -0
  28. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/e97f2d9bfc450d08ef3b.json +73 -0
  29. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/Qwen/Qwen2.5-0.5B/00b909b9addfb3e82c75.json +96 -0
  30. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/Qwen/Qwen2.5-0.5B/8a714805c0774dccda3d.json +96 -0
  31. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/4be7e684c15f704cb67d.json +78 -0
  32. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6cead920a0dbb3daefb9.json +78 -0
  33. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/cd761b198c1bec21bd55.json +78 -0
  34. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3/Qwen/Qwen3-0.6B/2ed7a8812dbe9c7ab058.json +101 -0
  35. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8fff447965fd3c157c0f.json +79 -0
  36. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/90f9e02faff8566070cb.json +79 -0
  37. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/f6c02365716b13dea692.json +79 -0
  38. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/60de13af0adf5a679b2c.json +148 -0
  39. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev3/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/fc72a21b6de27e9bcefe.json +79 -0
  40. neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/compile_flags.json +1 -0
  41. neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.done +0 -0
  42. neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.hlo_module.pb +3 -0
  43. neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.neff +0 -0
  44. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/compile_flags.json +1 -0
  45. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.done +0 -0
  46. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.hlo_module.pb +3 -0
  47. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.neff +3 -0
  48. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/wrapped_neff.hlo +3 -0
  49. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b67a734fc55d67768bd+253d6470/compile_flags.json +1 -0
  50. neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b67a734fc55d67768bd+253d6470/model.done +0 -0
.gitattributes CHANGED
@@ -10618,3 +10618,24 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bac481d29ac1829a8af+253d6470/model.neff f
10618
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_2052cf656c210489741d+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
10619
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_b9a3b941bc75ca360b60+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10620
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_b9a3b941bc75ca360b60+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10618
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_2052cf656c210489741d+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
10619
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_b9a3b941bc75ca360b60+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10620
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_b9a3b941bc75ca360b60+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10621
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10622
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10623
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b67a734fc55d67768bd+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10624
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_184fcc045921c74845be+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10625
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_184fcc045921c74845be+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10626
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d0c9d2357e7888e46cd+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10627
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_20ce63f489314924a057+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10628
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_40f6b4304be01d72d956+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10629
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_64f5633fb25cfe2548d6+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10630
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_806ef7cec3a07d99b574+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10631
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_806ef7cec3a07d99b574+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10632
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9f17b2a2f6b76b4ae4a8+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10633
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b522f7aabb6b091bde5f+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
10634
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b72befbd04b196887e2e+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10635
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b72befbd04b196887e2e+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10636
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c5daaeaa40996f359a5c+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10637
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c5daaeaa40996f359a5c+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10638
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_df21c2b35cb01f251b78+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10639
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_df21c2b35cb01f251b78+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
10640
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f561bf7d12efdc67186d+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
10641
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f561bf7d12efdc67186d+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/2e075469bb7e246e19de.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 1,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 6144,
16
+ "max_position_embeddings": 262144,
17
+ "max_window_layers": 48,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 768,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 8,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
29
+ "checkpoint_revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 8,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 8,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 8,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 32,
65
+ "num_experts": 128,
66
+ "num_experts_per_tok": 8,
67
+ "num_hidden_layers": 48,
68
+ "num_key_value_heads": 4,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000000,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": false,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/63936687f0615be0c4b5.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 1024,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 1024,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 1024,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/67d11d4985c28ac7d5ff.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/cd03fa772b07fbb454c0.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/e5eddc28add59e2ee9d6.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 2,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 2,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/21c419fc8bcc3104cc37.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tiny-random/qwen3-moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 2,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "tiny-random/qwen3-moe",
29
+ "checkpoint_revision": "10a349dcb488b10c27aa4a3c1dbefb74c41565c3",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 2,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/e14cd546176a864ee18b.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tiny-random/qwen3-moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "tiny-random/qwen3-moe",
29
+ "checkpoint_revision": "10a349dcb488b10c27aa4a3c1dbefb74c41565c3",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/tiny-random/qwen3-moe/e99ce6fcf5be305e9424.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tiny-random/qwen3-moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "tiny-random/qwen3-moe",
29
+ "checkpoint_revision": "10a349dcb488b10c27aa4a3c1dbefb74c41565c3",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/1cacc2cc42865a8b000d.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen3-moe-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 2,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "yujiepan/qwen3-moe-tiny-random",
29
+ "checkpoint_revision": "fb6c5ee2a2c19bd9aced6d9afd8a858966a7bb7e",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 2,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/619f2f5356bdf0f59205.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen3-moe-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "yujiepan/qwen3-moe-tiny-random",
29
+ "checkpoint_revision": "fb6c5ee2a2c19bd9aced6d9afd8a858966a7bb7e",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev1/qwen3_moe/yujiepan/qwen3-moe-tiny-random/67bc8c12b9b1221b38ad.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen3-moe-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "yujiepan/qwen3-moe-tiny-random",
29
+ "checkpoint_revision": "fb6c5ee2a2c19bd9aced6d9afd8a858966a7bb7e",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev1",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0f44022ffda5d90427b3.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "bfloat16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "bfloat16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 4,
65
+ "residual_multiplier": 1.0,
66
+ "rms_norm_eps": 1e-06,
67
+ "rope_scaling": null,
68
+ "rope_theta": 10000.0,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 49152
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/1471022e22b5b8b3de3b.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "float16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 4,
65
+ "residual_multiplier": 1.0,
66
+ "rms_norm_eps": 1e-06,
67
+ "rope_scaling": null,
68
+ "rope_theta": 10000.0,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 49152
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c9c911d79dbddb873dcf.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "float16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 4,
65
+ "residual_multiplier": 1.0,
66
+ "rms_norm_eps": 1e-06,
67
+ "rope_scaling": null,
68
+ "rope_theta": 10000.0,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 49152
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/granite/ibm-granite/granite-3.1-2b-instruct/b66dbd045f3e73eb7427.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "bfloat16",
53
+ "sequence_length": 4096,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "bfloat16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 32,
63
+ "num_hidden_layers": 40,
64
+ "num_key_value_heads": 8,
65
+ "residual_multiplier": 0.22,
66
+ "rms_norm_eps": 1e-05,
67
+ "rope_scaling": null,
68
+ "rope_theta": 5000000.0,
69
+ "tie_word_embeddings": true,
70
+ "use_cache": true,
71
+ "vocab_size": 49155
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/20b7299fc104cc69ce0f.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "bfloat16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "bfloat16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "pretraining_tp": 1,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": {
66
+ "factor": 8.0,
67
+ "high_freq_factor": 4.0,
68
+ "low_freq_factor": 1.0,
69
+ "original_max_position_embeddings": 8192,
70
+ "rope_type": "llama3"
71
+ },
72
+ "rope_theta": 500000.0,
73
+ "tie_word_embeddings": false,
74
+ "use_cache": true,
75
+ "vocab_size": 128256
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/502c5523ff7585cd3287.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 131072,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 131072,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "bfloat16",
51
+ "sequence_length": 131072,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "bfloat16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "pretraining_tp": 1,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": {
66
+ "factor": 8.0,
67
+ "high_freq_factor": 4.0,
68
+ "low_freq_factor": 1.0,
69
+ "original_max_position_embeddings": 8192,
70
+ "rope_type": "llama3"
71
+ },
72
+ "rope_theta": 500000.0,
73
+ "tie_word_embeddings": false,
74
+ "use_cache": true,
75
+ "vocab_size": 128256
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/9c27c3f4a94313192c63.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "float16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "pretraining_tp": 1,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": {
66
+ "factor": 8.0,
67
+ "high_freq_factor": 4.0,
68
+ "low_freq_factor": 1.0,
69
+ "original_max_position_embeddings": 8192,
70
+ "rope_type": "llama3"
71
+ },
72
+ "rope_theta": 500000.0,
73
+ "tie_word_embeddings": false,
74
+ "use_cache": true,
75
+ "vocab_size": 128256
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/llamafactory/tiny-random-Llama-3/f0cd6ae3a57125c25463.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "float16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 4,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 4,
63
+ "pretraining_tp": 1,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": {
66
+ "factor": 8.0,
67
+ "high_freq_factor": 4.0,
68
+ "low_freq_factor": 1.0,
69
+ "original_max_position_embeddings": 8192,
70
+ "rope_type": "llama3"
71
+ },
72
+ "rope_theta": 500000.0,
73
+ "tie_word_embeddings": false,
74
+ "use_cache": true,
75
+ "vocab_size": 128256
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/llama/unsloth/Llama-3.2-1B-Instruct/d4cdf18983e9784091ec.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "float16",
51
+ "sequence_length": 4096,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 32,
61
+ "num_hidden_layers": 16,
62
+ "num_key_value_heads": 8,
63
+ "pretraining_tp": 1,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_scaling": {
66
+ "factor": 32.0,
67
+ "high_freq_factor": 4.0,
68
+ "low_freq_factor": 1.0,
69
+ "original_max_position_embeddings": 8192,
70
+ "rope_type": "llama3"
71
+ },
72
+ "rope_theta": 500000.0,
73
+ "tie_word_embeddings": true,
74
+ "unsloth_fixed": true,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/34134d6beddf69d6e6c0.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qkv_kernel_enabled": false,
48
+ "rpl_reduce_dtype": "float16",
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 32,
59
+ "num_experts_per_tok": 2,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 8,
62
+ "num_local_experts": 8,
63
+ "output_router_logits": false,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_theta": 10000.0,
66
+ "router_aux_loss_coef": 0.001,
67
+ "router_jitter_noise": 0.0,
68
+ "sliding_window": 4096,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 32000
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/4625dfce3a6ec89cfa42.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qkv_kernel_enabled": false,
48
+ "rpl_reduce_dtype": "bfloat16",
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "bfloat16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 32,
59
+ "num_experts_per_tok": 2,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 8,
62
+ "num_local_experts": 8,
63
+ "output_router_logits": false,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_theta": 10000.0,
66
+ "router_aux_loss_coef": 0.001,
67
+ "router_jitter_noise": 0.0,
68
+ "sliding_window": 4096,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 32000
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/mixtral/dacorvo/Mixtral-tiny/f790967cf96542a801bc.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.1.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qkv_kernel_enabled": false,
48
+ "rpl_reduce_dtype": "float16",
49
+ "sequence_length": 100,
50
+ "sequence_parallel_enabled": false,
51
+ "speculation_length": 0,
52
+ "start_rank_id": 0,
53
+ "target": null,
54
+ "torch_dtype": "float16",
55
+ "tp_degree": 2,
56
+ "vocab_parallel": false
57
+ },
58
+ "num_attention_heads": 32,
59
+ "num_experts_per_tok": 2,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 8,
62
+ "num_local_experts": 8,
63
+ "output_router_logits": false,
64
+ "rms_norm_eps": 1e-05,
65
+ "rope_theta": 10000.0,
66
+ "router_aux_loss_coef": 0.001,
67
+ "router_jitter_noise": 0.0,
68
+ "sliding_window": 4096,
69
+ "tie_word_embeddings": false,
70
+ "use_cache": true,
71
+ "vocab_size": 32000
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/c3b4f930a6c51ad40028.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 4,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
29
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
30
+ "continuous_batching": true,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": true,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 4,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": true,
48
+ "optimum_neuron_version": "0.3.1.dev2",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 32,
65
+ "num_key_value_heads": 32,
66
+ "original_max_position_embeddings": 4096,
67
+ "partial_rotary_factor": 1.0,
68
+ "resid_pdrop": 0.0,
69
+ "rms_norm_eps": 1e-05,
70
+ "rope_scaling": null,
71
+ "rope_theta": 10000.0,
72
+ "sliding_window": 2047,
73
+ "tie_word_embeddings": false,
74
+ "use_cache": true,
75
+ "vocab_size": 32064
76
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/63d7815d3de40f55a3ae.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "float16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 1,
63
+ "original_max_position_embeddings": 16384,
64
+ "partial_rotary_factor": 1.0,
65
+ "resid_pdrop": 0.0,
66
+ "rms_norm_eps": 1e-05,
67
+ "rope_scaling": null,
68
+ "rope_theta": 250000,
69
+ "sliding_window": null,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 100352
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/892d9f2862662e5407c6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "float16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "float16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 1,
63
+ "original_max_position_embeddings": 16384,
64
+ "partial_rotary_factor": 1.0,
65
+ "resid_pdrop": 0.0,
66
+ "rms_norm_eps": 1e-05,
67
+ "rope_scaling": null,
68
+ "rope_theta": 250000,
69
+ "sliding_window": null,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 100352
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/phi3/yujiepan/phi-4-tiny-random/e97f2d9bfc450d08ef3b.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.1.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qkv_kernel_enabled": false,
50
+ "rpl_reduce_dtype": "bfloat16",
51
+ "sequence_length": 100,
52
+ "sequence_parallel_enabled": false,
53
+ "speculation_length": 0,
54
+ "start_rank_id": 0,
55
+ "target": null,
56
+ "torch_dtype": "bfloat16",
57
+ "tp_degree": 2,
58
+ "vocab_parallel": false
59
+ },
60
+ "num_attention_heads": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 1,
63
+ "original_max_position_embeddings": 16384,
64
+ "partial_rotary_factor": 1.0,
65
+ "resid_pdrop": 0.0,
66
+ "rms_norm_eps": 1e-05,
67
+ "rope_scaling": null,
68
+ "rope_theta": 250000,
69
+ "sliding_window": null,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 100352
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/Qwen/Qwen2.5-0.5B/00b909b9addfb3e82c75.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "max_window_layers": 24,
41
+ "model_type": "qwen2",
42
+ "neuron": {
43
+ "_serialized_key": "NxDNeuronConfig",
44
+ "async_mode": false,
45
+ "attn_kernel_enabled": false,
46
+ "batch_size": 4,
47
+ "capacity_factor": null,
48
+ "cc_pipeline_tiling_factor": 2,
49
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
50
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
51
+ "continuous_batching": true,
52
+ "enable_bucketing": false,
53
+ "ep_degree": 1,
54
+ "flash_decoding_enabled": false,
55
+ "fused_qkv": false,
56
+ "glu_mlp": true,
57
+ "is_chunked_prefill": false,
58
+ "local_ranks_size": 2,
59
+ "logical_nc_config": 1,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 4096,
62
+ "max_topk": 256,
63
+ "mlp_kernel_enabled": false,
64
+ "mlp_kernel_fuse_residual_add": false,
65
+ "n_active_tokens": 4096,
66
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
67
+ "num_cores_per_group": 1,
68
+ "on_device_sampling": false,
69
+ "optimum_neuron_version": "0.3.1.dev2",
70
+ "output_logits": false,
71
+ "padding_side": "right",
72
+ "pp_degree": 1,
73
+ "qkv_kernel_enabled": false,
74
+ "rpl_reduce_dtype": "float16",
75
+ "sequence_length": 4096,
76
+ "sequence_parallel_enabled": false,
77
+ "speculation_length": 0,
78
+ "start_rank_id": 0,
79
+ "target": null,
80
+ "torch_dtype": "float16",
81
+ "tp_degree": 2,
82
+ "vocab_parallel": false
83
+ },
84
+ "num_attention_heads": 14,
85
+ "num_hidden_layers": 24,
86
+ "num_key_value_heads": 2,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000.0,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": true,
92
+ "use_cache": true,
93
+ "use_mrope": false,
94
+ "use_sliding_window": false,
95
+ "vocab_size": 151936
96
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/Qwen/Qwen2.5-0.5B/8a714805c0774dccda3d.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "max_window_layers": 24,
41
+ "model_type": "qwen2",
42
+ "neuron": {
43
+ "_serialized_key": "NxDNeuronConfig",
44
+ "async_mode": false,
45
+ "attn_kernel_enabled": false,
46
+ "batch_size": 1,
47
+ "capacity_factor": null,
48
+ "cc_pipeline_tiling_factor": 2,
49
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
50
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
51
+ "continuous_batching": false,
52
+ "enable_bucketing": false,
53
+ "ep_degree": 1,
54
+ "flash_decoding_enabled": false,
55
+ "fused_qkv": false,
56
+ "glu_mlp": true,
57
+ "is_chunked_prefill": false,
58
+ "local_ranks_size": 2,
59
+ "logical_nc_config": 1,
60
+ "max_batch_size": 1,
61
+ "max_context_length": 32768,
62
+ "max_topk": 256,
63
+ "mlp_kernel_enabled": false,
64
+ "mlp_kernel_fuse_residual_add": false,
65
+ "n_active_tokens": 32768,
66
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
67
+ "num_cores_per_group": 1,
68
+ "on_device_sampling": true,
69
+ "optimum_neuron_version": "0.3.1.dev2",
70
+ "output_logits": false,
71
+ "padding_side": "right",
72
+ "pp_degree": 1,
73
+ "qkv_kernel_enabled": false,
74
+ "rpl_reduce_dtype": "bfloat16",
75
+ "sequence_length": 32768,
76
+ "sequence_parallel_enabled": false,
77
+ "speculation_length": 0,
78
+ "start_rank_id": 0,
79
+ "target": null,
80
+ "torch_dtype": "bfloat16",
81
+ "tp_degree": 2,
82
+ "vocab_parallel": false
83
+ },
84
+ "num_attention_heads": 14,
85
+ "num_hidden_layers": 24,
86
+ "num_key_value_heads": 2,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000.0,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": true,
92
+ "use_cache": true,
93
+ "use_mrope": false,
94
+ "use_sliding_window": false,
95
+ "vocab_size": 151936
96
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/4be7e684c15f704cb67d.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "float16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 2,
65
+ "rms_norm_eps": 1e-06,
66
+ "rope_scaling": {
67
+ "factor": 4.0,
68
+ "original_max_position_embeddings": 32768,
69
+ "rope_type": "yarn",
70
+ "type": "yarn"
71
+ },
72
+ "rope_theta": 1000000.0,
73
+ "sliding_window": null,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "use_sliding_window": false,
77
+ "vocab_size": 152064
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6cead920a0dbb3daefb9.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "float16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 2,
65
+ "rms_norm_eps": 1e-06,
66
+ "rope_scaling": {
67
+ "factor": 4.0,
68
+ "original_max_position_embeddings": 32768,
69
+ "rope_type": "yarn",
70
+ "type": "yarn"
71
+ },
72
+ "rope_theta": 1000000.0,
73
+ "sliding_window": null,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "use_sliding_window": false,
77
+ "vocab_size": 152064
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/cd761b198c1bec21bd55.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention"
16
+ ],
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 1,
19
+ "model_type": "qwen2",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
28
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": false,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.1.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "bfloat16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "torch_dtype": "bfloat16",
59
+ "tp_degree": 2,
60
+ "vocab_parallel": false
61
+ },
62
+ "num_attention_heads": 4,
63
+ "num_hidden_layers": 2,
64
+ "num_key_value_heads": 2,
65
+ "rms_norm_eps": 1e-06,
66
+ "rope_scaling": {
67
+ "factor": 4.0,
68
+ "original_max_position_embeddings": 32768,
69
+ "rope_type": "yarn",
70
+ "type": "yarn"
71
+ },
72
+ "rope_theta": 1000000.0,
73
+ "sliding_window": null,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "use_sliding_window": false,
77
+ "vocab_size": 152064
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3/Qwen/Qwen3-0.6B/2ed7a8812dbe9c7ab058.json ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 40960,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "neuron": {
49
+ "_serialized_key": "NxDNeuronConfig",
50
+ "async_mode": false,
51
+ "attn_kernel_enabled": false,
52
+ "batch_size": 4,
53
+ "capacity_factor": null,
54
+ "cc_pipeline_tiling_factor": 2,
55
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
56
+ "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
57
+ "continuous_batching": true,
58
+ "enable_bucketing": false,
59
+ "ep_degree": 1,
60
+ "flash_decoding_enabled": false,
61
+ "fused_qkv": true,
62
+ "glu_mlp": true,
63
+ "is_chunked_prefill": false,
64
+ "local_ranks_size": 2,
65
+ "logical_nc_config": 1,
66
+ "max_batch_size": 4,
67
+ "max_context_length": 4096,
68
+ "max_topk": 256,
69
+ "mlp_kernel_enabled": false,
70
+ "mlp_kernel_fuse_residual_add": false,
71
+ "n_active_tokens": 4096,
72
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
73
+ "num_cores_per_group": 1,
74
+ "on_device_sampling": false,
75
+ "optimum_neuron_version": "0.3.1.dev2",
76
+ "output_logits": false,
77
+ "padding_side": "right",
78
+ "pp_degree": 1,
79
+ "qkv_kernel_enabled": false,
80
+ "rpl_reduce_dtype": "bfloat16",
81
+ "sequence_length": 4096,
82
+ "sequence_parallel_enabled": false,
83
+ "speculation_length": 0,
84
+ "start_rank_id": 0,
85
+ "target": null,
86
+ "torch_dtype": "bfloat16",
87
+ "tp_degree": 2,
88
+ "vocab_parallel": false
89
+ },
90
+ "num_attention_heads": 16,
91
+ "num_hidden_layers": 28,
92
+ "num_key_value_heads": 8,
93
+ "rms_norm_eps": 1e-06,
94
+ "rope_scaling": null,
95
+ "rope_theta": 1000000,
96
+ "sliding_window": null,
97
+ "tie_word_embeddings": true,
98
+ "use_cache": true,
99
+ "use_sliding_window": false,
100
+ "vocab_size": 151936
101
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8fff447965fd3c157c0f.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 2,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 2,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev2",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/90f9e02faff8566070cb.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev2",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/f6c02365716b13dea692.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 2,
11
+ "head_dim": 32,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 64,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 128,
16
+ "max_position_embeddings": 40960,
17
+ "max_window_layers": 1,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 128,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 1,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
29
+ "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 1,
40
+ "max_context_length": 100,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 100,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev2",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 2,
65
+ "num_experts": 8,
66
+ "num_experts_per_tok": 2,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 1,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 1000000.0,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": true,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev2/smollm3/HuggingFaceTB/SmolLM3-3B/60de13af0adf5a679b2c.json ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "HuggingFaceTB/SmolLM3-3B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "SmolLM3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention"
51
+ ],
52
+ "max_position_embeddings": 65536,
53
+ "max_window_layers": 28,
54
+ "mlp_bias": false,
55
+ "model_type": "smollm3",
56
+ "neuron": {
57
+ "_serialized_key": "NxDNeuronConfig",
58
+ "async_mode": false,
59
+ "attn_kernel_enabled": false,
60
+ "batch_size": 4,
61
+ "capacity_factor": null,
62
+ "cc_pipeline_tiling_factor": 2,
63
+ "checkpoint_id": "HuggingFaceTB/SmolLM3-3B",
64
+ "checkpoint_revision": "1c00fc78bd9cf90108046bc433cb34992480f1c1",
65
+ "continuous_batching": true,
66
+ "enable_bucketing": false,
67
+ "ep_degree": 1,
68
+ "flash_decoding_enabled": false,
69
+ "fused_qkv": true,
70
+ "glu_mlp": true,
71
+ "is_chunked_prefill": false,
72
+ "local_ranks_size": 2,
73
+ "logical_nc_config": 1,
74
+ "max_batch_size": 4,
75
+ "max_context_length": 4096,
76
+ "max_topk": 256,
77
+ "mlp_kernel_enabled": false,
78
+ "mlp_kernel_fuse_residual_add": false,
79
+ "n_active_tokens": 4096,
80
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
81
+ "num_cores_per_group": 1,
82
+ "on_device_sampling": true,
83
+ "optimum_neuron_version": "0.3.1.dev2",
84
+ "output_logits": false,
85
+ "padding_side": "right",
86
+ "pp_degree": 1,
87
+ "qkv_kernel_enabled": false,
88
+ "rpl_reduce_dtype": "bfloat16",
89
+ "sequence_length": 4096,
90
+ "sequence_parallel_enabled": false,
91
+ "speculation_length": 0,
92
+ "start_rank_id": 0,
93
+ "target": null,
94
+ "torch_dtype": "bfloat16",
95
+ "tp_degree": 2,
96
+ "vocab_parallel": false
97
+ },
98
+ "no_rope_layer_interval": 4,
99
+ "no_rope_layers": [
100
+ 1,
101
+ 1,
102
+ 1,
103
+ 0,
104
+ 1,
105
+ 1,
106
+ 1,
107
+ 0,
108
+ 1,
109
+ 1,
110
+ 1,
111
+ 0,
112
+ 1,
113
+ 1,
114
+ 1,
115
+ 0,
116
+ 1,
117
+ 1,
118
+ 1,
119
+ 0,
120
+ 1,
121
+ 1,
122
+ 1,
123
+ 0,
124
+ 1,
125
+ 1,
126
+ 1,
127
+ 0,
128
+ 1,
129
+ 1,
130
+ 1,
131
+ 0,
132
+ 1,
133
+ 1,
134
+ 1,
135
+ 0
136
+ ],
137
+ "num_attention_heads": 16,
138
+ "num_hidden_layers": 36,
139
+ "num_key_value_heads": 4,
140
+ "pretraining_tp": 2,
141
+ "rms_norm_eps": 1e-06,
142
+ "rope_scaling": null,
143
+ "rope_theta": 5000000.0,
144
+ "sliding_window": null,
145
+ "use_cache": false,
146
+ "use_sliding_window": false,
147
+ "vocab_size": 128256
148
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev3/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/fc72a21b6de27e9bcefe.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3MoeForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "decoder_sparse_step": 1,
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 6144,
16
+ "max_position_embeddings": 262144,
17
+ "max_window_layers": 48,
18
+ "mlp_only_layers": [],
19
+ "model_type": "qwen3_moe",
20
+ "moe_intermediate_size": 768,
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 8,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
29
+ "checkpoint_revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
30
+ "continuous_batching": false,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": false,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 8,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 8,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.1.dev3",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 8,
61
+ "vocab_parallel": false
62
+ },
63
+ "norm_topk_prob": true,
64
+ "num_attention_heads": 32,
65
+ "num_experts": 128,
66
+ "num_experts_per_tok": 8,
67
+ "num_hidden_layers": 48,
68
+ "num_key_value_heads": 4,
69
+ "output_router_logits": false,
70
+ "rms_norm_eps": 1e-06,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000000,
73
+ "router_aux_loss_coef": 0.001,
74
+ "sliding_window": null,
75
+ "tie_word_embeddings": false,
76
+ "use_cache": true,
77
+ "use_sliding_window": false,
78
+ "vocab_size": 151936
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a6a5e05e7901f7ec6ea776377316585b1bf917c33884f8a2a2d3851e28128c6
3
+ size 7993
neuronxcc-2.19.8089.0+8ab9f450/MODULE_04376e1307045cf435ae+431f5505/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31c4cdb1fbc97738a6e37dec33d6f44e6b1284cef02f7149efa6cfbe91b480c
3
+ size 63836
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8842daaea9ad716443a64507d5e6eb80bd5036701c69d3cc22d3c8b28cfe3129
3
+ size 297984
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0a47deab436eaf26c99a+cd3419b6/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01904b235c4fe11e10878eb7201accabe15e04a5a33cb5e989bab9b2b9fd2617
3
+ size 307570
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b67a734fc55d67768bd+253d6470/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b67a734fc55d67768bd+253d6470/model.done ADDED
File without changes