Spaces:

jmanhype
/

MuseV

Runtime error

jmanhype

Initial commit without binary files

06e9d12 3 months ago

20.9 kB

	import gc
	import os
	from typing import Any, Callable, List, Literal, Union, Dict, Tuple
	import logging

	from safetensors.torch import load_file
	from safetensors import safe_open
	import torch
	from torch import nn
	from diffusers.models.controlnet import ControlNetModel
	from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
	from diffusers.pipelines.pipeline_utils import DiffusionPipeline
	from .convert_from_ckpt import (
	convert_ldm_unet_checkpoint,
	convert_ldm_vae_checkpoint,
	convert_ldm_clip_checkpoint,
	)
	from .convert_lora_safetensor_to_diffusers import convert_motion_lora_ckpt_to_diffusers

	logger = logging.getLogger(__name__)


	def update_pipeline_model_parameters(
	pipeline: DiffusionPipeline,
	model_path: str = None,
	lora_dict: Dict[str, Dict] = None,
	text_model_path: str = None,
	device="cuda",
	need_unload: bool = False,
	):
	if model_path is not None:
	pipeline = update_pipeline_basemodel(
	pipeline, model_path, text_sd_model_path=text_model_path, device=device
	)
	if lora_dict is not None:
	pipeline, unload_dict = update_pipeline_lora_models(
	pipeline,
	lora_dict,
	device=device,
	need_unload=need_unload,
	)
	if need_unload:
	return pipeline, unload_dict
	return pipeline


	def update_pipeline_basemodel(
	pipeline: DiffusionPipeline,
	model_path: str,
	text_sd_model_path: str,
	device: str = "cuda",
	):
	"""使用model_path更新pipeline中的基础参数

	Args:
	pipeline (DiffusionPipeline): _description_
	model_path (str): _description_
	text_sd_model_path (str): _description_
	device (str, optional): _description_. Defaults to "cuda".

	Returns:
	_type_: _description_
	"""
	# load base
	if model_path.endswith(".ckpt"):
	state_dict = torch.load(model_path, map_location=device)
	pipeline.unet.load_state_dict(state_dict)
	print("update sd_model", model_path)
	elif model_path.endswith(".safetensors"):
	base_state_dict = {}
	with safe_open(model_path, framework="pt", device=device) as f:
	for key in f.keys():
	base_state_dict[key] = f.get_tensor(key)

	is_lora = all("lora" in k for k in base_state_dict.keys())
	assert is_lora == False, "Base model cannot be LoRA: {}".format(model_path)

	# vae
	converted_vae_checkpoint = convert_ldm_vae_checkpoint(
	base_state_dict, pipeline.vae.config
	)
	pipeline.vae.load_state_dict(converted_vae_checkpoint)
	# unet
	converted_unet_checkpoint = convert_ldm_unet_checkpoint(
	base_state_dict, pipeline.unet.config
	)
	pipeline.unet.load_state_dict(converted_unet_checkpoint, strict=False)
	# text_model
	pipeline.text_encoder = convert_ldm_clip_checkpoint(
	base_state_dict, text_sd_model_path
	)
	print("update sd_model", model_path)
	pipeline.to(device)
	return pipeline


	# ref https://git.woa.com/innovative_tech/GenerationGroup/VirtualIdol/VidolImageDraw/blob/master/cfg.yaml
	LORA_BLOCK_WEIGHT_MAP = {
	"FACE": [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
	"DEFACE": [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1],
	"ALL": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
	"MIDD": [1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
	"OUTALL": [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
	}


	# ref https://git.woa.com/innovative_tech/GenerationGroup/VirtualIdol/VidolImageDraw/blob/master/pipeline/draw_pipe.py
	def update_pipeline_lora_model(
	pipeline: DiffusionPipeline,
	lora: Union[str, Dict],
	alpha: float = 0.75,
	device: str = "cuda",
	lora_prefix_unet: str = "lora_unet",
	lora_prefix_text_encoder: str = "lora_te",
	lora_unet_layers=[
	"lora_unet_down_blocks_0_attentions_0",
	"lora_unet_down_blocks_0_attentions_1",
	"lora_unet_down_blocks_1_attentions_0",
	"lora_unet_down_blocks_1_attentions_1",
	"lora_unet_down_blocks_2_attentions_0",
	"lora_unet_down_blocks_2_attentions_1",
	"lora_unet_mid_block_attentions_0",
	"lora_unet_up_blocks_1_attentions_0",
	"lora_unet_up_blocks_1_attentions_1",
	"lora_unet_up_blocks_1_attentions_2",
	"lora_unet_up_blocks_2_attentions_0",
	"lora_unet_up_blocks_2_attentions_1",
	"lora_unet_up_blocks_2_attentions_2",
	"lora_unet_up_blocks_3_attentions_0",
	"lora_unet_up_blocks_3_attentions_1",
	"lora_unet_up_blocks_3_attentions_2",
	],
	lora_block_weight_str: Literal["FACE", "ALL"] = "ALL",
	need_unload: bool = False,
	):
	"""使用 lora 更新pipeline中的unet相关参数

	Args:
	pipeline (DiffusionPipeline): _description_
	lora (Union[str, Dict]): _description_
	alpha (float, optional): _description_. Defaults to 0.75.
	device (str, optional): _description_. Defaults to "cuda".
	lora_prefix_unet (str, optional): _description_. Defaults to "lora_unet".
	lora_prefix_text_encoder (str, optional): _description_. Defaults to "lora_te".
	lora_unet_layers (list, optional): _description_. Defaults to [ "lora_unet_down_blocks_0_attentions_0", "lora_unet_down_blocks_0_attentions_1", "lora_unet_down_blocks_1_attentions_0", "lora_unet_down_blocks_1_attentions_1", "lora_unet_down_blocks_2_attentions_0", "lora_unet_down_blocks_2_attentions_1", "lora_unet_mid_block_attentions_0", "lora_unet_up_blocks_1_attentions_0", "lora_unet_up_blocks_1_attentions_1", "lora_unet_up_blocks_1_attentions_2", "lora_unet_up_blocks_2_attentions_0", "lora_unet_up_blocks_2_attentions_1", "lora_unet_up_blocks_2_attentions_2", "lora_unet_up_blocks_3_attentions_0", "lora_unet_up_blocks_3_attentions_1", "lora_unet_up_blocks_3_attentions_2", ].
	lora_block_weight_str (Literal["FACE", "ALL"], optional): _description_. Defaults to "ALL".
	need_unload (bool, optional): _description_. Defaults to False.

	Returns:
	_type_: _description_
	"""
	# ref https://git.woa.com/innovative_tech/GenerationGroup/VirtualIdol/VidolImageDraw/blob/master/pipeline/tool.py#L20
	if lora_block_weight_str is not None:
	lora_block_weight = LORA_BLOCK_WEIGHT_MAP[lora_block_weight_str.upper()]
	if lora_block_weight:
	assert len(lora_block_weight) == 17
	# load lora weight
	if isinstance(lora, str):
	state_dict = load_file(lora, device=device)
	else:
	for k in lora:
	lora[k] = lora[k].to(device)
	state_dict = lora # state_dict = {}

	visited = set()
	unload_dict = []
	# directly update weight in diffusers model
	for key in state_dict:
	# it is suggested to print out the key, it usually will be something like below
	# "lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight"

	# as we have set the alpha beforehand, so just skip
	if ".alpha" in key or key in visited:
	continue

	if "text" in key:
	layer_infos = (
	key.split(".")[0].split(lora_prefix_text_encoder + "_")[-1].split("_")
	)
	curr_layer = pipeline.text_encoder
	else:
	layer_infos = key.split(".")[0].split(lora_prefix_unet + "_")[-1].split("_")
	curr_layer = pipeline.unet

	# find the target layer
	temp_name = layer_infos.pop(0)
	while len(layer_infos) > -1:
	try:
	curr_layer = curr_layer.__getattr__(temp_name)
	if len(layer_infos) > 0:
	temp_name = layer_infos.pop(0)
	elif len(layer_infos) == 0:
	break
	except Exception:
	if len(temp_name) > 0:
	temp_name += "_" + layer_infos.pop(0)
	else:
	temp_name = layer_infos.pop(0)

	pair_keys = []
	if "lora_down" in key:
	pair_keys.append(key.replace("lora_down", "lora_up"))
	pair_keys.append(key)
	alpha_key = key.replace("lora_down.weight", "alpha")
	else:
	pair_keys.append(key)
	pair_keys.append(key.replace("lora_up", "lora_down"))
	alpha_key = key.replace("lora_up.weight", "alpha")

	# update weight
	if len(state_dict[pair_keys[0]].shape) == 4:
	weight_up = state_dict[pair_keys[0]].squeeze(3).squeeze(2).to(torch.float32)
	weight_down = (
	state_dict[pair_keys[1]].squeeze(3).squeeze(2).to(torch.float32)
	)
	if alpha_key in state_dict:
	weight_scale = state_dict[alpha_key].item() / weight_up.shape[1]
	else:
	weight_scale = 1.0
	# adding_weight = alpha * torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3)
	if len(weight_up.shape) == len(weight_down.shape):
	adding_weight = (
	alpha
	* weight_scale
	* torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3)
	)
	else:
	adding_weight = (
	alpha
	* weight_scale
	* torch.einsum("a b, b c h w -> a c h w", weight_up, weight_down)
	)
	else:
	weight_up = state_dict[pair_keys[0]].to(torch.float32)
	weight_down = state_dict[pair_keys[1]].to(torch.float32)
	if alpha_key in state_dict:
	weight_scale = state_dict[alpha_key].item() / weight_up.shape[1]
	else:
	weight_scale = 1.0
	adding_weight = alpha * weight_scale * torch.mm(weight_up, weight_down)
	adding_weight = adding_weight.to(torch.float16)
	if lora_block_weight:
	if "text" in key:
	adding_weight *= lora_block_weight[0]
	else:
	for idx, layer in enumerate(lora_unet_layers):
	if layer in key:
	adding_weight *= lora_block_weight[idx + 1]
	break

	curr_layer_unload_data = {"layer": curr_layer, "added_weight": adding_weight}
	curr_layer.weight.data += adding_weight

	unload_dict.append(curr_layer_unload_data)
	# update visited list
	for item in pair_keys:
	visited.add(item)
	if need_unload:
	return pipeline, unload_dict
	else:
	return pipeline


	# ref https://git.woa.com/innovative_tech/GenerationGroup/VirtualIdol/VidolImageDraw/blob/master/pipeline/draw_pipe.py
	def update_pipeline_lora_model_old(
	pipeline: DiffusionPipeline,
	lora: Union[str, Dict],
	alpha: float = 0.75,
	device: str = "cuda",
	lora_prefix_unet: str = "lora_unet",
	lora_prefix_text_encoder: str = "lora_te",
	lora_unet_layers=[
	"lora_unet_down_blocks_0_attentions_0",
	"lora_unet_down_blocks_0_attentions_1",
	"lora_unet_down_blocks_1_attentions_0",
	"lora_unet_down_blocks_1_attentions_1",
	"lora_unet_down_blocks_2_attentions_0",
	"lora_unet_down_blocks_2_attentions_1",
	"lora_unet_mid_block_attentions_0",
	"lora_unet_up_blocks_1_attentions_0",
	"lora_unet_up_blocks_1_attentions_1",
	"lora_unet_up_blocks_1_attentions_2",
	"lora_unet_up_blocks_2_attentions_0",
	"lora_unet_up_blocks_2_attentions_1",
	"lora_unet_up_blocks_2_attentions_2",
	"lora_unet_up_blocks_3_attentions_0",
	"lora_unet_up_blocks_3_attentions_1",
	"lora_unet_up_blocks_3_attentions_2",
	],
	lora_block_weight_str: Literal["FACE", "ALL"] = "ALL",
	need_unload: bool = False,
	):
	"""使用 lora 更新pipeline中的unet相关参数

	Args:
	pipeline (DiffusionPipeline): _description_
	lora (Union[str, Dict]): _description_
	alpha (float, optional): _description_. Defaults to 0.75.
	device (str, optional): _description_. Defaults to "cuda".
	lora_prefix_unet (str, optional): _description_. Defaults to "lora_unet".
	lora_prefix_text_encoder (str, optional): _description_. Defaults to "lora_te".
	lora_unet_layers (list, optional): _description_. Defaults to [ "lora_unet_down_blocks_0_attentions_0", "lora_unet_down_blocks_0_attentions_1", "lora_unet_down_blocks_1_attentions_0", "lora_unet_down_blocks_1_attentions_1", "lora_unet_down_blocks_2_attentions_0", "lora_unet_down_blocks_2_attentions_1", "lora_unet_mid_block_attentions_0", "lora_unet_up_blocks_1_attentions_0", "lora_unet_up_blocks_1_attentions_1", "lora_unet_up_blocks_1_attentions_2", "lora_unet_up_blocks_2_attentions_0", "lora_unet_up_blocks_2_attentions_1", "lora_unet_up_blocks_2_attentions_2", "lora_unet_up_blocks_3_attentions_0", "lora_unet_up_blocks_3_attentions_1", "lora_unet_up_blocks_3_attentions_2", ].
	lora_block_weight_str (Literal["FACE", "ALL"], optional): _description_. Defaults to "ALL".
	need_unload (bool, optional): _description_. Defaults to False.

	Returns:
	_type_: _description_
	"""
	# ref https://git.woa.com/innovative_tech/GenerationGroup/VirtualIdol/VidolImageDraw/blob/master/pipeline/tool.py#L20
	if lora_block_weight_str is not None:
	lora_block_weight = LORA_BLOCK_WEIGHT_MAP[lora_block_weight_str.upper()]
	if lora_block_weight:
	assert len(lora_block_weight) == 17
	# load lora weight
	if isinstance(lora, str):
	state_dict = load_file(lora, device=device)
	else:
	for k in lora:
	lora[k] = lora[k].to(device)
	state_dict = lora # state_dict = {}

	visited = set()
	unload_dict = []
	# directly update weight in diffusers model
	for key in state_dict:
	# it is suggested to print out the key, it usually will be something like below
	# "lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight"

	# as we have set the alpha beforehand, so just skip
	if ".alpha" in key or key in visited:
	continue

	if "text" in key:
	layer_infos = (
	key.split(".")[0].split(lora_prefix_text_encoder + "_")[-1].split("_")
	)
	curr_layer = pipeline.text_encoder
	else:
	layer_infos = key.split(".")[0].split(lora_prefix_unet + "_")[-1].split("_")
	curr_layer = pipeline.unet

	# find the target layer
	temp_name = layer_infos.pop(0)
	while len(layer_infos) > -1:
	try:
	curr_layer = curr_layer.__getattr__(temp_name)
	if len(layer_infos) > 0:
	temp_name = layer_infos.pop(0)
	elif len(layer_infos) == 0:
	break
	except Exception:
	if len(temp_name) > 0:
	temp_name += "_" + layer_infos.pop(0)
	else:
	temp_name = layer_infos.pop(0)

	pair_keys = []
	if "lora_down" in key:
	pair_keys.append(key.replace("lora_down", "lora_up"))
	pair_keys.append(key)
	else:
	pair_keys.append(key)
	pair_keys.append(key.replace("lora_up", "lora_down"))

	# update weight
	if len(state_dict[pair_keys[0]].shape) == 4:
	weight_up = state_dict[pair_keys[0]].squeeze(3).squeeze(2).to(torch.float32)
	weight_down = (
	state_dict[pair_keys[1]].squeeze(3).squeeze(2).to(torch.float32)
	)
	adding_weight = alpha * torch.mm(weight_up, weight_down).unsqueeze(
	2
	).unsqueeze(3)
	else:
	weight_up = state_dict[pair_keys[0]].to(torch.float32)
	weight_down = state_dict[pair_keys[1]].to(torch.float32)
	adding_weight = alpha * torch.mm(weight_up, weight_down)

	if lora_block_weight:
	if "text" in key:
	adding_weight *= lora_block_weight[0]
	else:
	for idx, layer in enumerate(lora_unet_layers):
	if layer in key:
	adding_weight *= lora_block_weight[idx + 1]
	break

	curr_layer_unload_data = {"layer": curr_layer, "added_weight": adding_weight}
	curr_layer.weight.data += adding_weight

	unload_dict.append(curr_layer_unload_data)
	# update visited list
	for item in pair_keys:
	visited.add(item)
	if need_unload:
	return pipeline, unload_dict
	else:
	return pipeline


	def update_pipeline_lora_models(
	pipeline: DiffusionPipeline,
	lora_dict: Dict[str, Dict],
	device: str = "cuda",
	need_unload: bool = True,
	lora_prefix_unet: str = "lora_unet",
	lora_prefix_text_encoder: str = "lora_te",
	lora_unet_layers=[
	"lora_unet_down_blocks_0_attentions_0",
	"lora_unet_down_blocks_0_attentions_1",
	"lora_unet_down_blocks_1_attentions_0",
	"lora_unet_down_blocks_1_attentions_1",
	"lora_unet_down_blocks_2_attentions_0",
	"lora_unet_down_blocks_2_attentions_1",
	"lora_unet_mid_block_attentions_0",
	"lora_unet_up_blocks_1_attentions_0",
	"lora_unet_up_blocks_1_attentions_1",
	"lora_unet_up_blocks_1_attentions_2",
	"lora_unet_up_blocks_2_attentions_0",
	"lora_unet_up_blocks_2_attentions_1",
	"lora_unet_up_blocks_2_attentions_2",
	"lora_unet_up_blocks_3_attentions_0",
	"lora_unet_up_blocks_3_attentions_1",
	"lora_unet_up_blocks_3_attentions_2",
	],
	):
	"""使用 lora 更新pipeline中的unet相关参数

	Args:
	pipeline (DiffusionPipeline): _description_
	lora_dict (Dict[str, Dict]): _description_
	device (str, optional): _description_. Defaults to "cuda".
	lora_prefix_unet (str, optional): _description_. Defaults to "lora_unet".
	lora_prefix_text_encoder (str, optional): _description_. Defaults to "lora_te".
	lora_unet_layers (list, optional): _description_. Defaults to [ "lora_unet_down_blocks_0_attentions_0", "lora_unet_down_blocks_0_attentions_1", "lora_unet_down_blocks_1_attentions_0", "lora_unet_down_blocks_1_attentions_1", "lora_unet_down_blocks_2_attentions_0", "lora_unet_down_blocks_2_attentions_1", "lora_unet_mid_block_attentions_0", "lora_unet_up_blocks_1_attentions_0", "lora_unet_up_blocks_1_attentions_1", "lora_unet_up_blocks_1_attentions_2", "lora_unet_up_blocks_2_attentions_0", "lora_unet_up_blocks_2_attentions_1", "lora_unet_up_blocks_2_attentions_2", "lora_unet_up_blocks_3_attentions_0", "lora_unet_up_blocks_3_attentions_1", "lora_unet_up_blocks_3_attentions_2", ].

	Returns:
	_type_: _description_
	"""
	unload_dicts = []
	for lora, value in lora_dict.items():
	lora_name = os.path.basename(lora).replace(".safetensors", "")
	strength_offset = value.get("strength_offset", 0.0)
	alpha = value.get("strength", 1.0)
	alpha += strength_offset
	lora_weight_str = value.get("lora_block_weight", "ALL")
	lora = load_file(lora)
	pipeline, unload_dict = update_pipeline_lora_model(
	pipeline,
	lora=lora,
	device=device,
	alpha=alpha,
	lora_prefix_unet=lora_prefix_unet,
	lora_prefix_text_encoder=lora_prefix_text_encoder,
	lora_unet_layers=lora_unet_layers,
	lora_block_weight_str=lora_weight_str,
	need_unload=True,
	)
	print(
	"Update LoRA {} with alpha {} and weight {}".format(
	lora_name, alpha, lora_weight_str
	)
	)
	unload_dicts += unload_dict
	return pipeline, unload_dicts


	def unload_lora(unload_dict: List[Dict[str, nn.Module]]):
	for layer_data in unload_dict:
	layer = layer_data["layer"]
	added_weight = layer_data["added_weight"]
	layer.weight.data -= added_weight

	gc.collect()
	torch.cuda.empty_cache()


	def load_motion_lora_weights(
	animation_pipeline,
	motion_module_lora_configs=[],
	):
	for motion_module_lora_config in motion_module_lora_configs:
	path, alpha = (
	motion_module_lora_config["path"],
	motion_module_lora_config["alpha"],
	)
	print(f"load motion LoRA from {path}")

	motion_lora_state_dict = torch.load(path, map_location="cpu")
	motion_lora_state_dict = (
	motion_lora_state_dict["state_dict"]
	if "state_dict" in motion_lora_state_dict
	else motion_lora_state_dict
	)

	animation_pipeline = convert_motion_lora_ckpt_to_diffusers(
	animation_pipeline, motion_lora_state_dict, alpha
	)

	return animation_pipeline