|
{
|
|
"architectures": [
|
|
"transformerModel"
|
|
],
|
|
"auto_map": {
|
|
"AutoConfig": "modeling_transformer.transformerConfig",
|
|
"AutoModelForCausalLM": "modeling_transformer.transformerModel"
|
|
},
|
|
"batch_size": 64,
|
|
"dropout": 0.1,
|
|
"ffn_num_hiddens": 64,
|
|
"ffn_num_input": 32,
|
|
"key_size": 32,
|
|
"lr": 0.005,
|
|
"model_type": "custom_transformer",
|
|
"norm_shape": [
|
|
32
|
|
],
|
|
"num_epochs": 200,
|
|
"num_heads": 4,
|
|
"num_hiddens": 32,
|
|
"num_layers": 2,
|
|
"num_steps": 10,
|
|
"query_size": 32,
|
|
"src_vocab_len": 184,
|
|
"tgt_vocab": 201,
|
|
"torch_dtype": "float32",
|
|
"transformers_version": "4.45.2",
|
|
"value_size": 32
|
|
}
|
|
|