{
  "architectures": [
    "PerceiverImageClassifier"
  ],
  "id2label": {
    "0": 0,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9
  },
  "label2id": {
    "0": 0,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9
  },
  "model_config": {
    "activation_checkpointing": false,
    "activation_offloading": false,
    "decoder": {
      "cross_attention_residual": true,
      "cross_attention_widening_factor": 1,
      "dropout": 0.1,
      "freeze": false,
      "init_scale": 0.1,
      "num_classes": 10,
      "num_cross_attention_heads": 1,
      "num_cross_attention_qk_channels": null,
      "num_cross_attention_v_channels": null,
      "num_output_queries": 1,
      "num_output_query_channels": 128
    },
    "encoder": {
      "cross_attention_widening_factor": 1,
      "dropout": 0.1,
      "first_cross_attention_layer_shared": false,
      "first_self_attention_block_shared": false,
      "freeze": false,
      "image_shape": [
        28,
        28,
        1
      ],
      "init_scale": 0.1,
      "num_cross_attention_heads": 1,
      "num_cross_attention_layers": 2,
      "num_cross_attention_qk_channels": null,
      "num_cross_attention_v_channels": null,
      "num_frequency_bands": 32,
      "num_self_attention_blocks": 3,
      "num_self_attention_heads": 8,
      "num_self_attention_layers_per_block": 3,
      "num_self_attention_qk_channels": null,
      "num_self_attention_v_channels": null,
      "self_attention_widening_factor": 1
    },
    "num_latent_channels": 128,
    "num_latents": 32
  },
  "model_type": "perceiver-io-image-classifier",
  "torch_dtype": "float32",
  "transformers_version": "4.30.2"
}