File size: 1,691 Bytes
b31b5bf
 
 
 
 
64f06f5
b31b5bf
f6a8ccf
b31b5bf
 
 
 
64f06f5
b31b5bf
64f06f5
 
 
 
f6a8ccf
64f06f5
b31b5bf
64f06f5
b31b5bf
64f06f5
b31b5bf
64f06f5
b31b5bf
 
 
 
 
64f06f5
 
b31b5bf
 
 
 
64f06f5
 
 
 
 
 
b31b5bf
 
 
64f06f5
b31b5bf
64f06f5
 
b31b5bf
 
 
64f06f5
b31b5bf
 
64f06f5
b31b5bf
 
64f06f5
b31b5bf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "architectures": [
    "Gemma3ForConditionalGeneration"
  ],
  "boi_token_index": 255999,
  "bos_token_id": 2,
  "eoi_token_index": 256000,
  "eos_token_id": 106,
  "image_token_index": 262144,
  "initializer_range": 0.02,
  "mm_tokens_per_image": 256,
  "model_type": "gemma3",
  "pad_token_id": 0,
  "text_config": {
    "attention_bias": false,
    "attention_dropout": 0.0,
    "attn_logit_softcapping": null,
    "cache_implementation": "hybrid",
    "eos_token_id": 106,
    "final_logit_softcapping": null,
    "head_dim": 128,
    "hidden_activation": "gelu_pytorch_tanh",
    "hidden_size": 5376,
    "initializer_range": 0.02,
    "intermediate_size": 21504,
    "max_position_embeddings": 131072,
    "model_type": "gemma3_text",
    "num_attention_heads": 32,
    "num_hidden_layers": 62,
    "num_key_value_heads": 16,
    "query_pre_attn_scalar": 168,
    "rms_norm_eps": 1e-06,
    "rope_local_base_freq": 10000.0,
    "rope_scaling": {
      "factor": 8.0,
      "rope_type": "linear"
    },
    "rope_theta": 1000000.0,
    "sliding_window": 1024,
    "sliding_window_pattern": 6,
    "torch_dtype": "bfloat16",
    "use_cache": true,
    "vocab_size": 262208
  },
  "torch_dtype": "bfloat16",
  "transformers_version": "4.50.0.dev0",
  "unsloth_fixed": true,
  "vision_config": {
    "attention_dropout": 0.0,
    "hidden_act": "gelu_pytorch_tanh",
    "hidden_size": 1152,
    "image_size": 896,
    "intermediate_size": 4304,
    "layer_norm_eps": 1e-06,
    "model_type": "siglip_vision_model",
    "num_attention_heads": 16,
    "num_channels": 3,
    "num_hidden_layers": 27,
    "patch_size": 14,
    "torch_dtype": "bfloat16",
    "vision_use_head": false
  }
}