shantipriya commited on
Commit
dd669cb
·
verified ·
1 Parent(s): 2baff7a

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +15 -23
config.json CHANGED
@@ -1,28 +1,20 @@
1
  {
2
  "architectures": [
3
- "MiniMindLM"
4
  ],
5
- "aux_loss_alpha": 0.1,
6
- "dim": 512,
7
- "dropout": 0.0,
8
- "flash_attn": true,
9
- "hidden_dim": 1408,
10
- "max_seq_len": 512,
11
  "model_type": "minimind",
12
- "multiple_of": 64,
13
- "n_heads": 8,
14
- "n_kv_heads": 2,
15
- "n_layers": 8,
16
- "n_routed_experts": 4,
17
- "n_shared_experts": true,
18
- "norm_eps": 1e-05,
19
- "norm_topk_prob": true,
20
- "num_experts_per_tok": 2,
21
- "rope_theta": 1000000.0,
22
- "scoring_func": "softmax",
23
- "seq_aux": true,
24
- "torch_dtype": "float32",
25
- "transformers_version": "4.49.0",
26
- "use_moe": false,
27
- "vocab_size": 6400
28
  }
 
1
  {
2
  "architectures": [
3
+ "Transformer"
4
  ],
 
 
 
 
 
 
5
  "model_type": "minimind",
6
+ "hidden_size": 512,
7
+ "num_hidden_layers": 8,
8
+ "num_attention_heads": 16,
9
+ "vocab_size": 6400,
10
+ "max_position_embeddings": 512,
11
+ "intermediate_size": 2048,
12
+ "hidden_dropout_prob": 0.0,
13
+ "attention_probs_dropout_prob": 0.0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_eps": 1e-12,
16
+ "pad_token_id": 0,
17
+ "bos_token_id": 1,
18
+ "eos_token_id": 2,
19
+ "use_cache": true
 
 
20
  }