Spaces:
Paused
Paused
# CHOOSE: | |
ngpus=4 | |
# below has to match GPUs for A6000s due to long context tests | |
export TESTMODULOTOTAL=4 | |
pip install pytest-instafail || true | |
docker ps | grep text-generation-inference | awk '{print $1}' | xargs docker stop | |
killall -s SIGINT pytest | |
killall -s SIGTERM pytest | |
killall -s 9 pytest | |
pkill --signal 9 -f weaviate-embedded/weaviate | |
NPHYSICAL=`lscpu -p | egrep -v '^\#' | sort -u -t, -k 2,4 | wc -l` | |
NPROCS=`lscpu -p | egrep -v '^\#' | wc -l` | |
# | |
n_jobs=$(($NPROCS / $TESTMODULOTOTAL)) | |
echo "CORES: $NPHYSICAL $NPROCS $n_jobs" | |
# GENERAL: | |
lowergpuid=0 | |
low=0 | |
high=$(($TESTMODULOTOTAL-1)) | |
pids="" | |
for mod in $(seq $low $high) | |
do | |
# in some cases launch gradio server, TGI server, or gradio server as inference server with +1 and +2 off base port | |
# ports always increment by 3 | |
export GRADIO_SERVER_PORT=$((7860+$(($mod*3)))) | |
export TESTMODULO=$mod | |
# CVD loops over number of GPUs | |
export CUDA_VISIBLE_DEVICES=$(($lowergpuid+$(($mod % $ngpus)))) | |
export n_jobs=$n_jobs | |
export OMP_NUM_THREADS=$n_jobs | |
export NUMEXPR_MAX_THREADS=$n_jobs | |
export OPENBLAS_NUM_THREADS=$n_jobs | |
# By default, OpenBLAS will restrict the Cpus_allowed to be 0x1. | |
export OPENBLAS_MAIN_FREE=$n_jobs | |
export MKL_NUM_THREADS=$n_jobs | |
export H2OGPT_BASE_PATH="./base_$mod" | |
# huggyllama test uses alot of memory, requires TESTMODULOTOTAL=ngpus for even A6000s | |
# pytest --instafail -s -v -n 1 tests -k "not test_huggyllama_transformers_pr" &> testsparallel"${mod}".log & | |
pytest --instafail -s -v -n 1 tests &> testsparallel"${mod}".log & | |
pid=$! | |
echo "MODS: $mod $GRADIO_SERVER_PORT $CUDA_VISIBLE_DEVICES $H2OGPT_BASE_PATH" | |
pids="$pids $pid" | |
done | |
trap "kill $pids; exit 1" INT | |
echo "to check on results while running, do:" | |
echo "grep -a PASSED testsparallel*.log | sed 's/.*PASSED//g' | sort | uniq |wc -l" | |
echo "grep -a FAILED testsparallel*.log | sed 's/.*FAILED//g' | sort | uniq |wc -l" | |
echo "to interrupt but still get some results, do:" | |
#echo "ps -auxwf | grep -v "[g]rep" | grep pytest | awk '{print $2}' |xargs kill -s SIGINT" | |
echo "kill -s SIGINT $pids" | |
wait | |