File size: 1,274 Bytes
35052d9
0653dc4
0a2aaf7
 
 
e805095
 
 
0a2aaf7
0653dc4
e805095
 
 
 
 
0a2aaf7
35052d9
 
 
0a2aaf7
e805095
0a2aaf7
 
e805095
1bd6f51
e805095
35052d9
e805095
 
0a2aaf7
e805095
 
0a2aaf7
 
 
35052d9
0653dc4
0a2aaf7
e805095
 
 
 
0a2aaf7
e805095
 
0a2aaf7
9b6ce8b
e805095
 
0a2aaf7
e805095
1bd6f51
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
{
  "_name_or_path": "huihui-ai/Llama-3.2-3B-Instruct-abliterated",
  "model_type": "llama",
  "torch_dtype": "float16",
  "transformers_version": "4.46.3",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "vocab_size": 128256,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "pad_token_id": 128009,
  "hidden_size": 3072,
  "intermediate_size": 8192,
  "num_hidden_layers": 28,
  "num_attention_heads": 24,
  "num_key_value_heads": 8,
  "head_dim": 128,
  "max_position_embeddings": 131072,
  "rope_scaling": {
    "type": "linear",
    "factor": 32
  },
  "rope_theta": 500000,
  "hidden_act": "silu",
  "initializer_range": 0.02,
  "rms_norm_eps": 0.00001,
  "layer_norm_eps": 0.00001,
  "attention_bias": false,
  "mlp_bias": false,
  "pretraining_tp": 1,
  "tie_word_embeddings": true,
  "use_cache": true,
  "shared_input_output_embedding": true,
  "attention_dropout": 0,
  "hidden_dropout_prob": 0.1,
  "attn_dropout_prob": 0.1,
  "residual_dropout_prob": 0,
  "ffn_dropout_prob": 0.1,
  "output_projection": true,
  "position_embedding_type": "rotary",
  "use_rope": true,
  "gradient_checkpointing": false,
  "multi_query_attention": false,
  "output_attentions": false,
  "output_hidden_states": false,
  "layerdrop": 0
}