yeongha
/

vilt_finetuned_200

Visual Question Answering

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

vilt_finetuned_200 / config.json

yeongha's picture

End of training

1a6624c verified 10 months ago

history blame contribute delete

1.32 kB

	{
	"_name_or_path": "dandelin/vilt-b32-mlm",
	"architectures": [
	"ViltForQuestionAnswering"
	],
	"attention_probs_dropout_prob": 0.0,
	"hidden_act": "gelu",
	"hidden_dropout_prob": 0.0,
	"hidden_size": 768,
	"id2label": {
	"0": "skateboard",
	"1": "down",
	"2": "no",
	"3": "chopsticks",
	"4": "at table",
	"5": "table",
	"6": "picnic table",
	"7": "crossing",
	"8": "walking",
	"9": "shrimp",
	"10": "ice cream",
	"11": "4",
	"12": "yes",
	"13": "watching",
	"14": "1"
	},
	"image_size": 384,
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"label2id": {
	"1": 14,
	"4": 11,
	"at table": 4,
	"chopsticks": 3,
	"crossing": 7,
	"down": 1,
	"ice cream": 10,
	"no": 2,
	"picnic table": 6,
	"shrimp": 9,
	"skateboard": 0,
	"table": 5,
	"walking": 8,
	"watching": 13,
	"yes": 12
	},
	"layer_norm_eps": 1e-12,
	"max_image_length": -1,
	"max_position_embeddings": 40,
	"modality_type_vocab_size": 2,
	"model_type": "vilt",
	"num_attention_heads": 12,
	"num_channels": 3,
	"num_hidden_layers": 12,
	"num_images": -1,
	"patch_size": 32,
	"qkv_bias": true,
	"tie_word_embeddings": false,
	"torch_dtype": "float32",
	"transformers_version": "4.40.1",
	"type_vocab_size": 2,
	"vocab_size": 30522
	}