|
{ |
|
"_name_or_path": "/project2/lichanghao_w/lch/retrain-heyna/checkpoint-15000", |
|
"activation_freq": 10, |
|
"architectures": [ |
|
"PlantGLMForCausalLM" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "configuration_plantglm.PlantGLMConfig", |
|
"AutoModel": "modeling_plantglm.PlantGLMModel", |
|
"AutoModelForCausalLM": "modeling_plantglm.PlantGLMForCausalLM", |
|
"AutoModelForSequenceClassification": "modeling_plantglm.PlantGLMForSequenceClassification" |
|
}, |
|
"d_inner": 3072, |
|
"d_model": 1024, |
|
"emb_dim": 9, |
|
"embed_dropout": 0.1, |
|
"filter_order": 256, |
|
"hyena_dropout": 0.0, |
|
"hyena_filter_dropout": 0.0, |
|
"hyena_order": 2, |
|
"initializer_range": 0.02, |
|
"max_seq_len": 65538, |
|
"model_type": "plantglm", |
|
"n_layer": 16, |
|
"num_inner_mlps": 2, |
|
"pad_token_id": 0, |
|
"pad_vocab_size_multiple": 8, |
|
"rms_norm_epsilon": 1e-06, |
|
"short_filter_order": 3, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"train_freq": true, |
|
"transformers_version": "4.44.2", |
|
"use_bias": true, |
|
"vocab_size": 24 |
|
} |
|
|