dianecy
/

VerbCentric-RIS

Model card Files Files and versions Community

dianecy commited on Oct 23, 2024

Commit

86bdb0c

verified ·

1 Parent(s): 4239f14

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config/cris_r50.yaml +58 -0
config/cris_verbonly_b64_nopos.yaml +74 -0
config/cris_verbonly_b64_nopos_hn0.03.yaml +74 -0
config/cris_verbonly_b64_nopos_hn0.1.yaml +74 -0
config/cris_verbonly_b64_nopos_nf.yaml +74 -0
config/cris_verbonly_b64_nopos_notarget.yaml +75 -0
config/hardneg/cris_aclvo_hn_b64_nopos.yaml +80 -0

config/cris_r50.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 32  # data loader workers
+  workers_val: 16
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 64  # batch size for training
+  batch_size_val: 64  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # Resume & Save
+  metric_mode: 'original'
+  metric_learning: False
+  exp_name: CRIS_R50
+  output_folder: exp/refcocog_u_repro
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume: 'latest' # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+  freeze: True
+Distributed:
+  dist_url: tcp://localhost:3681
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/cris_verbonly_b64_nopos.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only_rev' # Choice : ['hardpos_only', 'hardpos_only_rev', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  hn_prob: 0.0
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/cris_verbonly_b64_nopos_hn0.03.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only_rev' # Choice : ['hardpos_only', 'hardpos_only_rev', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  hn_prob: 0.0
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/cris_verbonly_b64_nopos_hn0.1.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only_rev' # Choice : ['hardpos_only', 'hardpos_only_rev', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  hn_prob: 0.0
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/cris_verbonly_b64_nopos_nf.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: False
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only' # Choice : ['hardpos_only', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  hn_prob: 0.0
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/cris_verbonly_b64_nopos_notarget.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only_rev' # Choice : ['hardpos_only', 'hardpos_only_rev', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  hn_prob: 0.0
+  hn_celoss: False
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False

config/hardneg/cris_aclvo_hn_b64_nopos.yaml ADDED Viewed

	@@ -0,0 +1,80 @@

+DATA:
+  dataset: refcocog_u
+  train_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/train.lmdb
+  train_split: train
+  val_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  val_split: val
+  mask_root: /home/s1/chaeyunkim/VerbCentric_CY/datasets/masks/refcocog_u
+TRAIN:
+  # Base Arch
+  clip_pretrain: pretrain/RN50.pt
+  input_size: 416
+  word_len: 22
+  word_dim: 1024
+  vis_dim: 512
+  fpn_in: [512, 1024, 1024]
+  fpn_out: [256, 512, 1024]
+  sync_bn: True
+  freeze: True
+  # Decoder
+  num_layers: 3
+  num_head: 8
+  dim_ffn: 2048
+  dropout: 0.1
+  intermediate: False
+  # Training Setting
+  workers: 4  # data loader workers
+  workers_val: 4
+  epochs: 50
+  milestones: [35]
+  start_epoch: 0
+  batch_size: 32  # batch size for training
+  batch_size_val: 32  # batch size for validation during training, memory and speed tradeoff
+  base_lr: 0.0001
+  lr_decay: 0.1
+  lr_multi: 0.1
+  weight_decay: 0.
+  max_norm: 0.
+  manual_seed: 0
+  print_freq: 100
+  # metric learning args
+  metric_learning: True
+  # specific metric learning args
+  metric_mode : 'hardpos_only' # Choice : ['hardpos_only', 'both']
+  exclude_multiobj : True # exclude multiobj (nobj >= 3)
+  exclude_pos : True # exclude multiobj w/ positional query expression (nobj >=2 && positional query)
+  loss_option : 'ACL_verbonly' # Choice : ['AML_verbonly', 'AML', 'ACL', 'ACL_verbonly']
+  metric_loss_weight: 0.1
+  # hard negative related options
+  hn_prob: 0.1 # hardneg percentage with regard to hard positive
+  acl_hn_weight : 0.1 # hardneg weight in acl loss aggregation
+  # hardneg no target supervision
+  hn_celoss : False
+  # Angular Margin Contrastive Loss argument
+  margin_value : 10
+  temperature : 0.05
+  # Resume & Save
+  exp_name: CRIS_AML_verbonly_pos25_b32
+  output_folder: exp/refcocog_u/exclude_multiobj
+  save_freq: 1
+  weight:  # path to initial weight (default: none)
+  resume:  "latest" # path to latest checkpoint (default: none)
+  evaluate: True  # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend
+Distributed:
+  dist_url: tcp://localhost:2298
+  dist_backend: 'nccl'
+  multiprocessing_distributed: True
+  world_size: 1
+  rank: 0
+TEST:
+  test_split: val-test
+  test_lmdb: /home/s1/chaeyunkim/VerbCentric_CY/datasets/lmdb/refcocog_u/val.lmdb
+  visualize: False