File size: 1,486 Bytes
c1a7f73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
#!/bin/bash
#SBATCH --job-name g4 # Job name
### Logging
#SBATCH --output=%j.out # Stdout (%j expands to jobId)
#SBATCH --error=%j.err # Stderr (%j expands to jobId)
### Node info
#SBATCH --nodes=1 # Single node or multi node
#SBATCH --nodelist=sota-1
#SBATCH --time 72:00:00 # Max time (hh:mm:ss)
#SBATCH --gres=gpu:4 # GPUs per node
#SBATCH --mem=128G # Recommend 32G per GPU
#SBATCH --ntasks-per-node=1 # Tasks per node
#SBATCH --cpus-per-task=32 # Recommend 8 per GPU
export REQUESTS_CA_BUNDLE="/etc/ssl/certs/ca-certificates.crt"
export HTTPS_PROXY="https://192.168.0.10:443/"
export https_proxy="https://192.168.0.10:443/"
export TEST_VAL_TRAIN=0
export TEST_VAL_PRED=1
export WANDB=1
sleep 604800
cd /u/xiuyu/work/dev4
PYTHONPATH=".":$PYTHONPATH python3 train.py \
--devices 4 \
--config configs/train/train_scalable_with_state.yaml \
--save_ckpt_path output/seq_1k_10_150_3_3_encode_occ_separate_offsets \
--pretrain_ckpt output/pretrain_scalable_map/epoch=31.ckpt
PYTHONPATH=".":$PYTHONPATH python val.py \
--config configs/ours_long_term.yaml \
--ckpt_path output/seq_5k_10_150_3_3_encode_occ_separate_offsets_bs8_128_no_seqindex_long/epoch=31.ckpt |