Spaces:

jmanhype
/

MuseV

Runtime error

MuseV / musev /schedulers /scheduling_lcm.py

jmanhype

Initial commit without binary files

06e9d12 3 months ago

12.6 kB

	# Copyright 2023 Stanford University Team and The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
	# and https://github.com/hojonathanho/diffusion
	from __future__ import annotations

	import math
	from dataclasses import dataclass
	from typing import List, Optional, Tuple, Union

	import numpy as np
	import torch
	from numpy import ndarray

	from diffusers.configuration_utils import ConfigMixin, register_to_config
	from diffusers.utils import BaseOutput, logging
	from diffusers.utils.torch_utils import randn_tensor
	from diffusers.schedulers.scheduling_utils import SchedulerMixin
	from diffusers.schedulers.scheduling_lcm import (
	LCMSchedulerOutput,
	betas_for_alpha_bar,
	rescale_zero_terminal_snr,
	LCMScheduler as DiffusersLCMScheduler,
	)
	from ..utils.noise_util import video_fusion_noise

	logger = logging.get_logger(__name__) # pylint: disable=invalid-name


	class LCMScheduler(DiffusersLCMScheduler):
	def __init__(
	self,
	num_train_timesteps: int = 1000,
	beta_start: float = 0.00085,
	beta_end: float = 0.012,
	beta_schedule: str = "scaled_linear",
	trained_betas: ndarray \| List[float] \| None = None,
	original_inference_steps: int = 50,
	clip_sample: bool = False,
	clip_sample_range: float = 1,
	set_alpha_to_one: bool = True,
	steps_offset: int = 0,
	prediction_type: str = "epsilon",
	thresholding: bool = False,
	dynamic_thresholding_ratio: float = 0.995,
	sample_max_value: float = 1,
	timestep_spacing: str = "leading",
	timestep_scaling: float = 10,
	rescale_betas_zero_snr: bool = False,
	):
	super().__init__(
	num_train_timesteps,
	beta_start,
	beta_end,
	beta_schedule,
	trained_betas,
	original_inference_steps,
	clip_sample,
	clip_sample_range,
	set_alpha_to_one,
	steps_offset,
	prediction_type,
	thresholding,
	dynamic_thresholding_ratio,
	sample_max_value,
	timestep_spacing,
	timestep_scaling,
	rescale_betas_zero_snr,
	)

	def step(
	self,
	model_output: torch.FloatTensor,
	timestep: int,
	sample: torch.FloatTensor,
	generator: Optional[torch.Generator] = None,
	return_dict: bool = True,
	w_ind_noise: float = 0.5,
	noise_type: str = "random",
	) -> Union[LCMSchedulerOutput, Tuple]:
	"""
	Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
	process from the learned model outputs (most often the predicted noise).

	Args:
	model_output (`torch.FloatTensor`):
	The direct output from learned diffusion model.
	timestep (`float`):
	The current discrete timestep in the diffusion chain.
	sample (`torch.FloatTensor`):
	A current instance of a sample created by the diffusion process.
	generator (`torch.Generator`, optional):
	A random number generator.
	return_dict (`bool`, optional, defaults to `True`):
	Whether or not to return a [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] or `tuple`.
	Returns:
	[`~schedulers.scheduling_utils.LCMSchedulerOutput`] or `tuple`:
	If return_dict is `True`, [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] is returned, otherwise a
	tuple is returned where the first element is the sample tensor.
	"""
	if self.num_inference_steps is None:
	raise ValueError(
	"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
	)

	if self.step_index is None:
	self._init_step_index(timestep)

	# 1. get previous step value
	prev_step_index = self.step_index + 1
	if prev_step_index < len(self.timesteps):
	prev_timestep = self.timesteps[prev_step_index]
	else:
	prev_timestep = timestep

	# 2. compute alphas, betas
	alpha_prod_t = self.alphas_cumprod[timestep]
	alpha_prod_t_prev = (
	self.alphas_cumprod[prev_timestep]
	if prev_timestep >= 0
	else self.final_alpha_cumprod
	)

	beta_prod_t = 1 - alpha_prod_t
	beta_prod_t_prev = 1 - alpha_prod_t_prev

	# 3. Get scalings for boundary conditions
	c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep)

	# 4. Compute the predicted original sample x_0 based on the model parameterization
	if self.config.prediction_type == "epsilon": # noise-prediction
	predicted_original_sample = (
	sample - beta_prod_t.sqrt() * model_output
	) / alpha_prod_t.sqrt()
	elif self.config.prediction_type == "sample": # x-prediction
	predicted_original_sample = model_output
	elif self.config.prediction_type == "v_prediction": # v-prediction
	predicted_original_sample = (
	alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
	)
	else:
	raise ValueError(
	f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
	" `v_prediction` for `LCMScheduler`."
	)

	# 5. Clip or threshold "predicted x_0"
	if self.config.thresholding:
	predicted_original_sample = self._threshold_sample(
	predicted_original_sample
	)
	elif self.config.clip_sample:
	predicted_original_sample = predicted_original_sample.clamp(
	-self.config.clip_sample_range, self.config.clip_sample_range
	)

	# 6. Denoise model output using boundary conditions
	denoised = c_out * predicted_original_sample + c_skip * sample

	# 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
	# Noise is not used on the final timestep of the timestep schedule.
	# This also means that noise is not used for one-step sampling.
	device = model_output.device

	if self.step_index != self.num_inference_steps - 1:
	if noise_type == "random":
	noise = randn_tensor(
	model_output.shape,
	dtype=model_output.dtype,
	device=device,
	generator=generator,
	)
	elif noise_type == "video_fusion":
	noise = video_fusion_noise(
	model_output, w_ind_noise=w_ind_noise, generator=generator
	)
	prev_sample = (
	alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
	)
	else:
	prev_sample = denoised

	# upon completion increase step index by one
	self._step_index += 1

	if not return_dict:
	return (prev_sample, denoised)

	return LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised)

	def step_bk(
	self,
	model_output: torch.FloatTensor,
	timestep: int,
	sample: torch.FloatTensor,
	generator: Optional[torch.Generator] = None,
	return_dict: bool = True,
	) -> Union[LCMSchedulerOutput, Tuple]:
	"""
	Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
	process from the learned model outputs (most often the predicted noise).

	Args:
	model_output (`torch.FloatTensor`):
	The direct output from learned diffusion model.
	timestep (`float`):
	The current discrete timestep in the diffusion chain.
	sample (`torch.FloatTensor`):
	A current instance of a sample created by the diffusion process.
	generator (`torch.Generator`, optional):
	A random number generator.
	return_dict (`bool`, optional, defaults to `True`):
	Whether or not to return a [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] or `tuple`.
	Returns:
	[`~schedulers.scheduling_utils.LCMSchedulerOutput`] or `tuple`:
	If return_dict is `True`, [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] is returned, otherwise a
	tuple is returned where the first element is the sample tensor.
	"""
	if self.num_inference_steps is None:
	raise ValueError(
	"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
	)

	if self.step_index is None:
	self._init_step_index(timestep)

	# 1. get previous step value
	prev_step_index = self.step_index + 1
	if prev_step_index < len(self.timesteps):
	prev_timestep = self.timesteps[prev_step_index]
	else:
	prev_timestep = timestep

	# 2. compute alphas, betas
	alpha_prod_t = self.alphas_cumprod[timestep]
	alpha_prod_t_prev = (
	self.alphas_cumprod[prev_timestep]
	if prev_timestep >= 0
	else self.final_alpha_cumprod
	)

	beta_prod_t = 1 - alpha_prod_t
	beta_prod_t_prev = 1 - alpha_prod_t_prev

	# 3. Get scalings for boundary conditions
	c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep)

	# 4. Compute the predicted original sample x_0 based on the model parameterization
	if self.config.prediction_type == "epsilon": # noise-prediction
	predicted_original_sample = (
	sample - beta_prod_t.sqrt() * model_output
	) / alpha_prod_t.sqrt()
	elif self.config.prediction_type == "sample": # x-prediction
	predicted_original_sample = model_output
	elif self.config.prediction_type == "v_prediction": # v-prediction
	predicted_original_sample = (
	alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output
	)
	else:
	raise ValueError(
	f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
	" `v_prediction` for `LCMScheduler`."
	)

	# 5. Clip or threshold "predicted x_0"
	if self.config.thresholding:
	predicted_original_sample = self._threshold_sample(
	predicted_original_sample
	)
	elif self.config.clip_sample:
	predicted_original_sample = predicted_original_sample.clamp(
	-self.config.clip_sample_range, self.config.clip_sample_range
	)

	# 6. Denoise model output using boundary conditions
	denoised = c_out * predicted_original_sample + c_skip * sample

	# 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference
	# Noise is not used on the final timestep of the timestep schedule.
	# This also means that noise is not used for one-step sampling.
	if self.step_index != self.num_inference_steps - 1:
	noise = randn_tensor(
	model_output.shape,
	generator=generator,
	device=model_output.device,
	dtype=denoised.dtype,
	)
	prev_sample = (
	alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise
	)
	else:
	prev_sample = denoised

	# upon completion increase step index by one
	self._step_index += 1

	if not return_dict:
	return (prev_sample, denoised)

	return LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised)