anycam_v1_seq2 / config.yaml
fwimbauer's picture
Upload config.yaml with huggingface_hub
42cafd2 verified
training:
optimizer:
type: adam
args:
lr: 0.0001
betas:
- 0.9
- 0.999
eps: 1.0e-08
weight_decay: 0.0
amsgrad: false
scheduler:
type: step
step_size: 100000
gamma: 0.1
num_epochs: 1000
continue: true
checkpoint_every: 2500
log_every_iters: 100
n_saved: 4
stop_iteration: 250000
dataset_cfgs:
sintel:
type: sintel
data_path_training: data/Sintel/training
data_path_testing: data/Sintel/training
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/sintel/training
preprocessed_path_testing: data/unimatch_flows/sintel/training
return_depth: false
return_flow: true
sintel_gt:
type: sintel-gt
data_path_training: data/Sintel/training
data_path_testing: data/Sintel/training
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/sintel/training
preprocessed_path_testing: data/unimatch_flows/sintel/training
return_depth: true
return_flow: true
waymo:
type: waymo
data_path_training: data/waymo/training
data_path_testing: data/waymo/testing
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/waymo/training
preprocessed_path_testing: data/unimatch_flows/waymo/testing
return_depth: false
return_flow: true
re10k_eval_seqs:
type: re10k
data_path_training: data/realestate10k/test.pickle
data_path_testing: data/realestate10k/test.pickle
split: anycam/datasets/realestate10k/splits/eval_seqs
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/realestate10k
preprocessed_path_testing: data/unimatch_flows/realestate10k
return_depth: false
return_flow: true
tumrgbd_eval_seqs:
type: tumrgbd
data_path_training: data/TUM_RGBD
data_path_testing: data/TUM_RGBD
split: anycam/datasets/tum_rgbd/splits/dynamic_seqs
image_size: 384
frame_count: 2
dilation: 10
return_depth: false
return_flow: false
tumrgbd_eval_seqs_64:
type: tumrgbd
data_path_training: data/TUM_RGBD
data_path_testing: data/TUM_RGBD
split: anycam/datasets/tum_rgbd/splits/dynamic_seqs_64
image_size: 384
frame_count: 2
dilation: 10
return_depth: false
return_flow: false
waymo_eval_seqs_2_64:
type: waymo
data_path_training: data/waymo/training
data_path_testing: data/waymo/testing
split: anycam/datasets/waymo/splits/eval_seqs_2_64
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/waymo/training
preprocessed_path_testing: data/unimatch_flows/waymo/testing
return_depth: false
return_flow: true
re10k:
type: re10k
data_path_training: data/realestate10k/test.pickle
data_path_testing: data/realestate10k/test.pickle
split: null
image_size: 384
frame_count: 2
dilation: 1
preprocessed_path_training: data/unimatch_flows/realestate10k
preprocessed_path_testing: data/unimatch_flows/realestate10k
return_depth: false
return_flow: true
re10k_n2:
type: re10k
data_path_training: data/realestate10k/test.pickle
data_path_testing: data/realestate10k/test.pickle
split: null
image_size: 384
frame_count: 2
dilation: 2
preprocessed_path_training: data/unimatch_flows/realestate10k_2
preprocessed_path_testing: data/unimatch_flows/realestate10k_2
return_depth: false
return_flow: true
re10k_n3:
type: re10k
data_path_training: data/realestate10k/test.pickle
data_path_testing: data/realestate10k/test.pickle
split: null
image_size: 384
frame_count: 2
dilation: 3
preprocessed_path_training: data/unimatch_flows/realestate10k_3
preprocessed_path_testing: data/unimatch_flows/realestate10k_3
return_depth: false
return_flow: true
youtube_vos:
type: youtubevos
data_path_training: data/youtube-vos/train
data_path_testing: data/youtube-vos/valid
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/youtube-vos/train
preprocessed_path_testing: data/unimatch_flows/youtube-vos/valid
return_depth: false
return_flow: false
youtube_vos_n2:
type: youtubevos
data_path_training: data/youtube-vos/train
data_path_testing: data/youtube-vos/train
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/youtubevos_2
preprocessed_path_testing: data/unimatch_flows/youtubevos_2
return_depth: false
return_flow: false
dilation: 2
youtube_vos_n3:
type: youtubevos
data_path_training: data/youtube-vos/train
data_path_testing: data/youtube-vos/train
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/youtubevos_3
preprocessed_path_testing: data/unimatch_flows/youtubevos_3
return_depth: false
return_flow: false
dilation: 3
opendv:
type: opendv
data_path_training: data/opendv/frames
data_path_testing: data/opendv/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/opendv/flows
preprocessed_path_testing: data/opendv/flows
return_depth: false
return_flow: true
opendv_n2:
type: opendv
data_path_training: data/opendv/frames
data_path_testing: data/opendv/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/opendv_2
preprocessed_path_testing: data/unimatch_flows/opendv_2
return_depth: false
return_flow: true
dilation: 2
opendv_n3:
type: opendv
data_path_training: data/opendv/frames
data_path_testing: data/opendv/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/opendv_3
preprocessed_path_testing: data/unimatch_flows/opendv_3
return_depth: false
return_flow: true
dilation: 3
walkingtours:
type: walkingtours
data_path_training: data/walkingtours/frames
data_path_testing: data/walkingtours/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/walkingtours/flows
preprocessed_path_testing: data/walkingtours/flows
return_depth: false
return_flow: true
walkingtours_n2:
type: walkingtours
data_path_training: data/walkingtours/frames
data_path_testing: data/walkingtours/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/walkingtours_2
preprocessed_path_testing: data/unimatch_flows/walkingtours_2
return_depth: false
return_flow: true
dilation: 2
walkingtours_n3:
type: walkingtours
data_path_training: data/walkingtours/frames
data_path_testing: data/walkingtours/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/walkingtours_3
preprocessed_path_testing: data/unimatch_flows/walkingtours_3
return_depth: false
return_flow: true
dilation: 3
epickitchens:
type: epickitchens
data_path_training: data/epickitchens/frames
data_path_testing: data/epickitchens/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/epickitchens/flows
preprocessed_path_testing: data/epickitchens/flows
return_depth: false
return_flow: true
epickitchens_n2:
type: epickitchens
data_path_training: data/epickitchens/frames
data_path_testing: data/epickitchens/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/epickitchens_2
preprocessed_path_testing: data/unimatch_flows/epickitchens_2
return_depth: false
return_flow: true
dilation: 2
epickitchens_n3:
type: epickitchens
data_path_training: data/epickitchens/frames
data_path_testing: data/epickitchens/frames
split: null
image_size: 384
frame_count: 2
preprocessed_path_training: data/unimatch_flows/epickitchens_3
preprocessed_path_testing: data/unimatch_flows/epickitchens_3
return_depth: false
return_flow: true
dilation: 3
training_type: anycam_training
seed: 0
backend: nccl
nproc_per_node: 2
with_amp: true
name: anycam_seq2
batch_size: 16
num_workers: 8
dataset:
- re10k
- youtube_vos
- opendv
- walkingtours
- epickitchens
val_dataset:
- re10k_eval_seqs
- waymo_eval_seqs_2_64
dataset_params:
frame_count: 2
return_flow: true
image_size: 336
sequential: true
dataloading:
epoch_length: 80000
staged_datasets:
re10k: 1
re10k_n2: 1
re10k_n3: 1
walkingtours: 2
walkingtours_n2: 2
walkingtours_n3: 2
youtube_vos: 3
youtube_vos_n2: 3
youtube_vos_n3: 3
epickitchens: 4
epickitchens_n2: 4
epickitchens_n3: 4
opendv: 5
opendv_n2: 5
opendv_n3: 5
output:
path: pretrained_models/anycam_seq2
unique_id: 1311123
loss:
- type: pose_loss
lambda_dist: 0
pose_token_weight_decay: 0.01
lambda_fwd_bwd_consistency: 1
model:
depth_predictor:
type: unidepth
pose_predictor:
type: anycam
focal_parameterization: linlog-candidates
focal_min: 0.2
focal_max: 7
rotation_parameterization: axis-angle
separate_pose_candidates: true
separate_uncertainty_candidates: true
depth_aligner:
type: identity
flow_model: unimatch
use_provided_flow: true
use_provided_proj: false
train_directions: both
perform_subsampled_pose_pass: false
subsampling_drop_n: 1
single_focal_warmup_iters: 0
z_near: 0.1
z_far: 100
validation:
validation:
batch_size: 1
subset:
type: range
args:
start: 0
end: 512
custom_validator: anycam.video_validator.video_validator
fit_video_config: anycam/configs/eval_cfgs/train_eval_2.yaml
log_loss: false
global_step:
type: trainer iteration
events:
- type: ITERATION_COMPLETED
args:
every: 5000
- type: COMPLETED
args: null
visualization:
metrics: []
subset:
type: range
args:
start: 0
end: 1
visualize:
input_imgs: null
depth: null
occlusions: null
rendered_flow: null
gt_flow: null
predicted_occlusions: null
uncertainty: null
log_loss: false
global_step:
type: trainer iteration
events:
- type: ITERATION_COMPLETED
args:
every: 2500
- type: COMPLETED
args: null
master_port: 2223
cuda device name: Quadro RTX 8000