Spaces:

jbilcke-hf
/

VideoModelStudio

Paused

VideoModelStudio / docs /finetrainers-src-codebase /examples /inference /wan /wan_text_to_video.sh

Julian Bilcke

we are going to hack into finetrainers

9fd1204 4 months ago

3.12 kB

	#!/bin/bash

	set -e -x

	# export TORCH_LOGS="+dynamo,recompiles,graph_breaks"
	# export TORCHDYNAMO_VERBOSE=1
	# export WANDB_MODE="offline"
	export WANDB_MODE="disabled"
	export NCCL_P2P_DISABLE=1
	export NCCL_IB_DISABLE=1
	export TORCH_NCCL_ENABLE_MONITORING=0
	export FINETRAINERS_LOG_LEVEL="DEBUG"

	# Download the validation dataset
	if [ ! -d "examples/inference/datasets/openvid-1k-split-validation" ]; then
	echo "Downloading validation dataset..."
	huggingface-cli download --repo-type dataset finetrainers/OpenVid-1k-split-validation --local-dir examples/inference/datasets/openvid-1k-split-validation
	else
	echo "Validation dataset already exists. Skipping download."
	fi

	BACKEND="ptd"

	NUM_GPUS=4
	CUDA_VISIBLE_DEVICES="0,1,2,3"

	# Check the JSON files for the expected JSON format
	DATASET_FILE="examples/inference/wan/dummy_text_to_video.json"

	# Depending on how many GPUs you have available, choose your degree of parallelism and technique!
	DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	DDP_8="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 8 --dp_shards 1 --cp_degree 1 --tp_degree 1"
	CP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 2 --tp_degree 1"
	CP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 4 --tp_degree 1"
	# FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1"
	# FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1"
	# HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1"

	# Parallel arguments
	parallel_cmd=(
	$CP_4
	)

	# Model arguments
	model_cmd=(
	--model_name "wan"
	--pretrained_model_name_or_path "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
	--enable_slicing
	--enable_tiling
	)

	# Inference arguments
	inference_cmd=(
	--inference_type text_to_video
	--dataset_file "$DATASET_FILE"
	)

	# Attention provider arguments
	attn_provider_cmd=(
	--attn_provider sage
	)

	# Torch config arguments
	torch_config_cmd=(
	--allow_tf32
	--float32_matmul_precision high
	)

	# Miscellaneous arguments
	miscellaneous_cmd=(
	--seed 31337
	--tracker_name "finetrainers-inference"
	--output_dir "/raid/aryan/wan-inference"
	--init_timeout 600
	--nccl_timeout 600
	--report_to "wandb"
	)

	# Execute the inference script
	export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES

	torchrun \
	--standalone \
	--nnodes=1 \
	--nproc_per_node=$NUM_GPUS \
	--rdzv_backend c10d \
	--rdzv_endpoint="localhost:19242" \
	examples/inference/inference.py \
	"${parallel_cmd[@]}" \
	"${model_cmd[@]}" \
	"${inference_cmd[@]}" \
	"${attn_provider_cmd[@]}" \
	"${torch_config_cmd[@]}" \
	"${miscellaneous_cmd[@]}"

	echo -ne "-------------------- Finished executing script --------------------\n\n"