iantc104's picture
Push model using huggingface_hub.
34530d5 verified
{
"act_attn_layers": null,
"act_attn_loss_weights": null,
"act_attn_sigma": 25.0,
"chunk_size": 33,
"dim_feedforward": 3200,
"dim_gaze_decoder": 512,
"dim_gaze_decoder_feedforward": 3200,
"dim_model": 512,
"dinov2_attn_layers": null,
"dinov2_attn_loss_weights": null,
"dinov2_attn_sigma": 25.0,
"dropout": 0.1,
"eyes": {
"observation.left_eye": "observation.images.zed_cam_left"
},
"feedforward_activation": "relu",
"freeze_backbone": false,
"gaze_loss_weight": 1.0,
"gaze_sigma": 50.0,
"image_size": [
480,
640
],
"input_normalization_modes": {
"observation.images.left_eye_cam": "mean_std",
"observation.images.right_eye_cam": "mean_std",
"observation.state": "mean_std"
},
"input_shapes": {
"observation.images.left_eye_cam": [
3,
480,
640
],
"observation.images.right_eye_cam": [
3,
480,
640
],
"observation.state": [
21
]
},
"kl_weight": 10.0,
"latent_dim": 32,
"n_action_steps": 33,
"n_decoder_layers": 1,
"n_encoder_layers": 4,
"n_gaze_decoder_layers": 1,
"n_heads": 8,
"n_obs_steps": 1,
"n_vae_encoder_layers": 4,
"output_normalization_modes": {
"action": "mean_std"
},
"output_shapes": {
"action": [
21
]
},
"pre_norm": false,
"pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
"replace_final_stride_with_dilation": false,
"temporal_ensemble_coeff": null,
"use_act_attn": false,
"use_dinov2_attn": false,
"use_gaze": false,
"use_vae": true,
"vision_backbone": "resnet18"
}