{ "exp_id": "vidcab_ret_m", "debug": false, "llm_model": "gpt2-medium", "llm_8bit": false, "freeze_lm": false, "lora": false, "vis_model": "openai/clip-vit-base-patch32", "precision": "bf16", "n_visual_tokens": 1, "rand_init": false, "num_layers": -1, "hidden_dim": 1152, "nheads": 8, "dim_feedforward": 2048, "dropout": 0.1, "droppath": 0.1, "vis_pooling": false, "vis_query_pooling": false, "last_vis_mean": false, "vocab_model": "google/siglip-so400m-patch14-384", "dataset_dir": "/blob/v-lqinghong/data/Ego_database", "log_base_dir": "/blob/v-lqinghong/experiments/VLog", "dataset": "ret", "metadata": "egoclip_vidcab", "fullset": false, "val_dataset": "ret", "val_metadata": "egoclip_vidcab", "workers": 16, "visual_input": "feature", "image_size": 224, "num_frame": 1, "add_special_tokens": false, "num_history": 0, "past_len": 0, "train_narrator": "narration_pass_2", "add_eos": true, "max_len": 128, "max_len_eval": 128, "max_clip_len": 128, "temperature": 0.7, "epochs": 10, "start_epoch": 0, "steps_per_epoch": -1, "val_steps_per_epoch": -1, "batch_size": 32, "val_batch_size": 32, "print_freq": 1, "resume": "", "evaluate": false, "only_best": true, "do_sample": false, "lock_lm": false, "lr": 0.0003, "beta1": 0.9, "beta2": 0.95, "weight_decay": 0.0, "lr_warmup_steps": 100, "lr_schedule_step_size": 10, "lr_schedule_gamma": 0.1, "grad_accumulation_steps": 1, "grad_clip": 1.0, "loss": "nce", "nce_temperature": 0.05, "scale_lm_loss": 1.0, "scale_ret_loss": 1.0, "train_class": false, "main_node": true, "world_size": -1, "local_rank": -1, "dist_url": "tcp://localhost:44122", "dist_backend": "nccl", "seed": null, "gpu": null, "multiprocessing_distributed": false, "log_dir": "/blob/v-lqinghong/experiments/VLog/vidcab_ret_m/2025-03-08_12-48-28" }