File size: 1,691 Bytes
b31b5bf 64f06f5 b31b5bf f6a8ccf b31b5bf 64f06f5 b31b5bf 64f06f5 f6a8ccf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf 64f06f5 b31b5bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
{
"architectures": [
"Gemma3ForConditionalGeneration"
],
"boi_token_index": 255999,
"bos_token_id": 2,
"eoi_token_index": 256000,
"eos_token_id": 106,
"image_token_index": 262144,
"initializer_range": 0.02,
"mm_tokens_per_image": 256,
"model_type": "gemma3",
"pad_token_id": 0,
"text_config": {
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"cache_implementation": "hybrid",
"eos_token_id": 106,
"final_logit_softcapping": null,
"head_dim": 128,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 5376,
"initializer_range": 0.02,
"intermediate_size": 21504,
"max_position_embeddings": 131072,
"model_type": "gemma3_text",
"num_attention_heads": 32,
"num_hidden_layers": 62,
"num_key_value_heads": 16,
"query_pre_attn_scalar": 168,
"rms_norm_eps": 1e-06,
"rope_local_base_freq": 10000.0,
"rope_scaling": {
"factor": 8.0,
"rope_type": "linear"
},
"rope_theta": 1000000.0,
"sliding_window": 1024,
"sliding_window_pattern": 6,
"torch_dtype": "bfloat16",
"use_cache": true,
"vocab_size": 262208
},
"torch_dtype": "bfloat16",
"transformers_version": "4.50.0.dev0",
"unsloth_fixed": true,
"vision_config": {
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_size": 896,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 27,
"patch_size": 14,
"torch_dtype": "bfloat16",
"vision_use_head": false
}
}
|