File size: 3,066 Bytes
ebf5d87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
#!/usr/bin/env bash
# run at project root dir
# Usage:
# bash baselines/clip_alignment_with_language/scripts/train.sh tvr all ANY_OTHER_PYTHON_ARGS
dset_name=$1 # see case below
ctx_mode=$2 # ["video", "sub", "tef", "video_sub", "video_tef", "sub_tef", "video_sub_tef"]
vid_feat_type=$3 # [resnet, i3d, resnet_i3d, none] , none for subtitles only models
feature_root=data/tvr_feature_release
results_root=baselines/mixture_embedding_experts/results
vid_feat_size=2048
extra_args=()
if [[ ${ctx_mode} == *"sub"* ]] || [[ ${ctx_mode} == "sub" ]]; then
if [[ ${dset_name} != "tvr" ]]; then
echo "The use of subtitles is only supported in tvr."
exit 1
fi
fi
case ${dset_name} in
tvr)
train_path=data/tvr_train_release.jsonl
corpus_path=data/tvr_video2dur_idx.json
desc_bert_path=${feature_root}/bert_feature/query_only/tvr_query_pretrained_w_query.h5
vid_feat_path=${feature_root}/video_feature/tvr_resnet152_rgb_max_cl-1.5.h5
clip_length=1.5
eval_split_name=val
nms_thd=-1
extra_args+=(--eval_path)
extra_args+=(data/tvr_val_release.jsonl)
if [[ ${vid_feat_type} == "i3d" ]]; then
echo "Using I3D feature with shape 1024"
vid_feat_path=${feature_root}/video_feature/tvr_i3d_rgb600_avg_cl-1.5.h5
vid_feat_size=1024
elif [[ ${vid_feat_type} == "resnet" ]]; then
echo "Using ResNet feature with shape 2048"
vid_feat_path=${feature_root}/video_feature/tvr_resnet152_rgb_max_cl-1.5.h5
vid_feat_size=2048
elif [[ ${vid_feat_type} == "resnet_i3d" ]]; then
echo "Using concatenated ResNet and I3D feature with shape 2048+1024"
vid_feat_path=${feature_root}/video_feature/tvr_resnet152_rgb_max_i3d_rgb600_avg_cat_cl-1.5.h5
vid_feat_size=3072
extra_args+=(--no_norm_vfeat) # since they are already normalized.
fi
if [[ ${ctx_mode} == *"sub"* ]] || [[ ${ctx_mode} == "sub" ]]; then
echo "Running with sub."
desc_bert_path=${feature_root}/bert_feature/sub_query/tvr_query_pretrained_w_sub_query.h5 # overwrite
sub_bert_path=${feature_root}/bert_feature/sub_query/tvr_sub_pretrained_w_sub_query_max_cl-1.5.h5
sub_feat_size=768
extra_args+=(--sub_feat_size)
extra_args+=(${sub_feat_size})
extra_args+=(--sub_bert_path)
extra_args+=(${sub_bert_path})
fi
;;
*)
echo -n "Unknown argument"
;;
esac
echo "Start training with dataset [${dset_name}] in Context Mode [${ctx_mode}]"
echo "Extra args ${extra_args[@]}"
python baselines/mixture_embedding_experts/train.py \
--dset_name=${dset_name} \
--eval_split_name=${eval_split_name} \
--results_root=${results_root} \
--train_path=${train_path} \
--desc_bert_path=${desc_bert_path} \
--corpus_path=${corpus_path} \
--vid_feat_path=${vid_feat_path} \
--vid_feat_size=${vid_feat_size} \
--ctx_mode=${ctx_mode} \
${extra_args[@]} \
${@:4}
|