{ "data": { "augmentations": { "blur_p": 0.2, "flip_p": 0.5, "gamma_p": 0.8, "grayscale_p": 0.2, "jitter_p": 0.8, "random_blur": 2.0, "random_gamma": 0.2, "random_jitter": 0.4, "random_scale": 2.0, "shape_constraints": { "height_min": 15, "pixels_max": 600000, "pixels_min": 200000, "ratio_bounds": [ 0.5, 2.5 ], "sample": true, "shape_mult": 14, "width_min": 15 }, "test_context": 1.0 }, "crop": "garg", "data_root": "datasets", "image_shape": [ 480, 640 ], "normalization": "imagenet", "num_copies": 2, "num_frames": 1, "sampling": { "ETH3D": 1.0, "Waymo": 1.0 }, "train_datasets": [ "ETH3D", "Waymo" ], "val_datasets": [ "IBims" ] }, "eps": 1e-06, "generic": { "deterministic": true, "seed": 13 }, "model": { "expansion": 4, "layer_scale": 1.0, "name": "UniDepthV2", "num_heads": 8, "pixel_decoder": { "depths": [ 2, 2, 2 ], "dropout": 0.0, "hidden_dim": 512, "kernel_size": 3, "name": "Decoder", "out_dim": 64 }, "pixel_encoder": { "cls_token_embed_dims": [ 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ], "depths": [ 6, 12, 18, 24 ], "embed_dim": 1024, "embed_dims": [ 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ], "freeze_norm": true, "frozen_stages": 0, "lr": 2e-06, "name": "dinov2_vitl14", "num_register_tokens": 0, "output_idx": [ 6, 12, 18, 24 ], "patch_size": 14, "pretrained": null, "stacking_fn": "last", "use_norm": true, "wd": 0.1 } }, "training": { "batch_size": 8, "clipping": 1.0, "cycle_beta": false, "drop_path": 0.0, "ema": true, "f16": true, "ld": 1.0, "losses": { "camera": { "alpha": 1.0, "fn": "l2", "gamma": 1.0, "input_fn": "linear", "name": "Regression", "output_fn": "sqrt", "weight": 0.25 }, "confidence": { "alpha": 1.0, "fn": "l1", "gamma": 1.0, "input_fn": "linear", "name": "Regression", "output_fn": "sqrt", "weight": 0.1 }, "depth": { "dims": [ -2, -1 ], "input_fn": "log", "integrated": 0.15, "name": "SILog", "output_fn": "sqrt", "weight": 1.0 }, "invariance": { "name": "SelfDistill", "output_fn": "sqrt", "weight": 0.1 }, "ssi": { "input_fn": "log1i", "min_samples": 6, "name": "EdgeGuidedLocalSSI", "output_fn": "sqrt", "use_global": true, "weight": 1.0 } }, "lr": 0.0001, "lr_final": 1e-06, "lr_warmup": 1.0, "n_iters": 300000, "nsteps_accumulation_gradient": 2, "use_checkpoint": false, "validation_interval": 2, "warmup_iters": 75000, "wd": 0.1, "wd_final": 0.1 } }