ohicarip
/

animateanyone

Model card Files Files and versions Community

animateanyone / model_config.yaml

ohicarip's picture

add model

b25bc3f about 1 year ago

history blame contribute delete

2.71 kB

	model:
	base_learning_rate: 1.0e-6
	target: refnet.models.animator.Animator
	params:
	linear_start: 0.00085
	linear_end: 0.0120
	num_timesteps_cond: 1
	log_every_t: 200
	timesteps: 1000
	first_stage_key: image
	cond_stage_key: reference
	control_key: control
	image_size: 64
	channels: 4
	cond_stage_trainable: false
	conditioning_key: inject
	monitor: val/loss_simple_ema
	scale_factor: 0.18215
	use_ema: true
	ucg_rate: 0.1
	use_token: true
	offset_noise_level: 0.05
	is_first_stage: False

	unet_config:
	target: refnet.modules.unet.DualCondUNet
	params:
	image_size: 32 # unused
	in_channels: 4
	c_channels: 3
	out_channels: 4
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_head_channels: 64
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False
	use_temporal: true
	frames_t: 16

	refnet_config:
	target: refnet.modules.unet.ReferenceNet
	params:
	image_size: 32 # unused
	in_channels: 4
	out_channels: 4
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_head_channels: 64
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False

	first_stage_config:
	target: ldm.models.autoencoder.AutoencoderKL
	params:
	embed_dim: 4
	monitor: val/rec_loss
	ddconfig:
	double_z: true
	z_channels: 4
	resolution: 512
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult: [1, 2, 4, 4]
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	lossconfig:
	target: torch.nn.Identity
	is_first_stage: False

	cond_stage_config:
	target: refnet.modules.encoders.FrozenOpenCLIPImageEmbedder
	params:
	arch: ViT-L-14
	output_tokens: true
	is_first_stage: False

	dataloader:
	class: AnimateLoader
	params:
	transform_list:
	flip: true
	rotate: false
	resize: true
	jitter: False
	rotate_range: 45
	refset_key: reference # only use deformation training in ColorizeDiffusion v2
	load_size: 576
	crop_size: 512 # crop images to (crop_size, crop_size), randomly crop images when crop_size < load_size
	keep_ratio: false
	inverse_grayscale: true
	is_first_stage: False
	shuffle: true