{ | |
"encoders_hidden_size": 768, | |
"ignore_index": -100, | |
"llm_name": "bilalfaye/gpt", | |
"max_inference_batch_size": 32, | |
"max_seq_len": 256, | |
"modal_tokens": 30, | |
"multiple_of": 256, | |
"n_head": 8, | |
"n_up_layers": 8, | |
"up_dim": 768 | |
} |
{ | |
"encoders_hidden_size": 768, | |
"ignore_index": -100, | |
"llm_name": "bilalfaye/gpt", | |
"max_inference_batch_size": 32, | |
"max_seq_len": 256, | |
"modal_tokens": 30, | |
"multiple_of": 256, | |
"n_head": 8, | |
"n_up_layers": 8, | |
"up_dim": 768 | |
} |