#!/bin/bash #SBATCH --job-name g2 # Job name ### Logging #SBATCH --output=%j.out # Stdout (%j expands to jobId) #SBATCH --error=%j.err # Stderr (%j expands to jobId) ### Node info #SBATCH --nodes=1 # Single node or multi node #SBATCH --nodelist=sota-2 #SBATCH --time 24:00:00 # Max time (hh:mm:ss) #SBATCH --gres=gpu:2 # GPUs per node #SBATCH --mem=96G # Recommend 32G per GPU #SBATCH --ntasks-per-node=1 # Tasks per node #SBATCH --cpus-per-task=16 # Recommend 8 per GPU export NCCL_DEBUG=INFO export REQUESTS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt" export HTTPS_PROXY="https://192.168.0.10:443/" export https_proxy="https://192.168.0.10:443/" export TEST_VAL_TRAIN=False export TEST_VAL_PRED=True export WANDB=True sleep 86400 cd /u/xiuyu/work/dev4 PYTHONPATH=".":$PYTHONPATH python3 train.py \ --devices 2 \ --config configs/train/train_scalable_with_state.yaml \ --save_ckpt_path output/seed_1k_pure_seed_150_3_emb_head_3_debug \ --pretrain_ckpt output/ours_map_pretrain/epoch=31.ckpt PYTHONPATH=".":$PYTHONPATH python val.py \ --config configs/validation/val_scalable_with_state.yaml \ --save_path output/seed_debug \ --pretrain_ckpt output/seed_1k_pure_seed_150_3_emb_head_3/last.ckpt