Spaces:
Runtime error
Runtime error
Update hy3dgen/shapegen/pipelines.py
Browse files- hy3dgen/shapegen/pipelines.py +169 -34
hy3dgen/shapegen/pipelines.py
CHANGED
|
@@ -34,11 +34,12 @@ import trimesh
|
|
| 34 |
import yaml
|
| 35 |
from PIL import Image
|
| 36 |
from diffusers.utils.torch_utils import randn_tensor
|
|
|
|
| 37 |
from tqdm import tqdm
|
| 38 |
|
| 39 |
from .models.autoencoders import ShapeVAE
|
| 40 |
from .models.autoencoders import SurfaceExtractors
|
| 41 |
-
from .utils import logger, synchronize_timer
|
| 42 |
|
| 43 |
|
| 44 |
def retrieve_timesteps(
|
|
@@ -137,6 +138,9 @@ def instantiate_from_config(config, **kwargs):
|
|
| 137 |
|
| 138 |
|
| 139 |
class Hunyuan3DDiTPipeline:
|
|
|
|
|
|
|
|
|
|
| 140 |
@classmethod
|
| 141 |
@synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
|
| 142 |
def from_single_file(
|
|
@@ -217,34 +221,12 @@ class Hunyuan3DDiTPipeline:
|
|
| 217 |
dtype=dtype,
|
| 218 |
device=device,
|
| 219 |
)
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
logger.info('Model path not exists, try to download from huggingface')
|
| 227 |
-
try:
|
| 228 |
-
import huggingface_hub
|
| 229 |
-
# download from huggingface
|
| 230 |
-
path = huggingface_hub.snapshot_download(repo_id=original_model_path)
|
| 231 |
-
model_path = os.path.join(path, subfolder)
|
| 232 |
-
except ImportError:
|
| 233 |
-
logger.warning(
|
| 234 |
-
"You need to install HuggingFace Hub to load models from the hub."
|
| 235 |
-
)
|
| 236 |
-
raise RuntimeError(f"Model path {model_path} not found")
|
| 237 |
-
except Exception as e:
|
| 238 |
-
raise e
|
| 239 |
-
|
| 240 |
-
if not os.path.exists(model_path):
|
| 241 |
-
raise FileNotFoundError(f"Model path {original_model_path} not found")
|
| 242 |
-
|
| 243 |
-
extension = 'ckpt' if not use_safetensors else 'safetensors'
|
| 244 |
-
variant = '' if variant is None else f'.{variant}'
|
| 245 |
-
ckpt_name = f'model{variant}.{extension}'
|
| 246 |
-
config_path = os.path.join(model_path, 'config.yaml')
|
| 247 |
-
ckpt_path = os.path.join(model_path, ckpt_name)
|
| 248 |
return cls.from_single_file(
|
| 249 |
ckpt_path,
|
| 250 |
config_path,
|
|
@@ -278,17 +260,170 @@ class Hunyuan3DDiTPipeline:
|
|
| 278 |
self.model = torch.compile(self.model)
|
| 279 |
self.conditioner = torch.compile(self.conditioner)
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
def to(self, device=None, dtype=None):
|
| 282 |
-
if device is not None:
|
| 283 |
-
self.device = torch.device(device)
|
| 284 |
-
self.vae.to(device)
|
| 285 |
-
self.model.to(device)
|
| 286 |
-
self.conditioner.to(device)
|
| 287 |
if dtype is not None:
|
| 288 |
self.dtype = dtype
|
| 289 |
self.vae.to(dtype=dtype)
|
| 290 |
self.model.to(dtype=dtype)
|
| 291 |
self.conditioner.to(dtype=dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
@synchronize_timer('Encode cond')
|
| 294 |
def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):
|
|
|
|
| 34 |
import yaml
|
| 35 |
from PIL import Image
|
| 36 |
from diffusers.utils.torch_utils import randn_tensor
|
| 37 |
+
from diffusers.utils.import_utils import is_accelerate_version, is_accelerate_available
|
| 38 |
from tqdm import tqdm
|
| 39 |
|
| 40 |
from .models.autoencoders import ShapeVAE
|
| 41 |
from .models.autoencoders import SurfaceExtractors
|
| 42 |
+
from .utils import logger, synchronize_timer, smart_load_model
|
| 43 |
|
| 44 |
|
| 45 |
def retrieve_timesteps(
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
class Hunyuan3DDiTPipeline:
|
| 141 |
+
model_cpu_offload_seq = "conditioner->model->vae"
|
| 142 |
+
_exclude_from_cpu_offload = []
|
| 143 |
+
|
| 144 |
@classmethod
|
| 145 |
@synchronize_timer('Hunyuan3DDiTPipeline Model Loading')
|
| 146 |
def from_single_file(
|
|
|
|
| 221 |
dtype=dtype,
|
| 222 |
device=device,
|
| 223 |
)
|
| 224 |
+
config_path, ckpt_path = smart_load_model(
|
| 225 |
+
model_path,
|
| 226 |
+
subfolder=subfolder,
|
| 227 |
+
use_safetensors=use_safetensors,
|
| 228 |
+
variant=variant
|
| 229 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
return cls.from_single_file(
|
| 231 |
ckpt_path,
|
| 232 |
config_path,
|
|
|
|
| 260 |
self.model = torch.compile(self.model)
|
| 261 |
self.conditioner = torch.compile(self.conditioner)
|
| 262 |
|
| 263 |
+
def enable_flashvdm(
|
| 264 |
+
self,
|
| 265 |
+
enabled: bool = True,
|
| 266 |
+
adaptive_kv_selection=True,
|
| 267 |
+
topk_mode='mean',
|
| 268 |
+
mc_algo='dmc',
|
| 269 |
+
replace_vae=True,
|
| 270 |
+
):
|
| 271 |
+
if enabled:
|
| 272 |
+
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 273 |
+
turbo_vae_mapping = {
|
| 274 |
+
'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
|
| 275 |
+
'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0-turbo'),
|
| 276 |
+
'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini-turbo'),
|
| 277 |
+
}
|
| 278 |
+
model_name = model_path.split('/')[-1]
|
| 279 |
+
if replace_vae and model_name in turbo_vae_mapping:
|
| 280 |
+
model_path, subfolder = turbo_vae_mapping[model_name]
|
| 281 |
+
self.vae = ShapeVAE.from_pretrained(
|
| 282 |
+
model_path, subfolder=subfolder,
|
| 283 |
+
use_safetensors=self.kwargs['from_pretrained_kwargs']['use_safetensors'],
|
| 284 |
+
device=self.device,
|
| 285 |
+
)
|
| 286 |
+
self.vae.enable_flashvdm_decoder(
|
| 287 |
+
enabled=enabled,
|
| 288 |
+
adaptive_kv_selection=adaptive_kv_selection,
|
| 289 |
+
topk_mode=topk_mode,
|
| 290 |
+
mc_algo=mc_algo
|
| 291 |
+
)
|
| 292 |
+
else:
|
| 293 |
+
model_path = self.kwargs['from_pretrained_kwargs']['model_path']
|
| 294 |
+
vae_mapping = {
|
| 295 |
+
'Hunyuan3D-2': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
|
| 296 |
+
'Hunyuan3D-2mv': ('tencent/Hunyuan3D-2', 'hunyuan3d-vae-v2-0'),
|
| 297 |
+
'Hunyuan3D-2mini': ('tencent/Hunyuan3D-2mini', 'hunyuan3d-vae-v2-mini'),
|
| 298 |
+
}
|
| 299 |
+
model_name = model_path.split('/')[-1]
|
| 300 |
+
if model_name in vae_mapping:
|
| 301 |
+
model_path, subfolder = vae_mapping[model_name]
|
| 302 |
+
self.vae = ShapeVAE.from_pretrained(model_path, subfolder=subfolder)
|
| 303 |
+
self.vae.enable_flashvdm_decoder(enabled=False)
|
| 304 |
+
|
| 305 |
def to(self, device=None, dtype=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
if dtype is not None:
|
| 307 |
self.dtype = dtype
|
| 308 |
self.vae.to(dtype=dtype)
|
| 309 |
self.model.to(dtype=dtype)
|
| 310 |
self.conditioner.to(dtype=dtype)
|
| 311 |
+
if device is not None:
|
| 312 |
+
self.device = torch.device(device)
|
| 313 |
+
self.vae.to(device)
|
| 314 |
+
self.model.to(device)
|
| 315 |
+
self.conditioner.to(device)
|
| 316 |
+
|
| 317 |
+
@property
|
| 318 |
+
def _execution_device(self):
|
| 319 |
+
r"""
|
| 320 |
+
Returns the device on which the pipeline's models will be executed. After calling
|
| 321 |
+
[`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from
|
| 322 |
+
Accelerate's module hooks.
|
| 323 |
+
"""
|
| 324 |
+
for name, model in self.components.items():
|
| 325 |
+
if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload:
|
| 326 |
+
continue
|
| 327 |
+
|
| 328 |
+
if not hasattr(model, "_hf_hook"):
|
| 329 |
+
return self.device
|
| 330 |
+
for module in model.modules():
|
| 331 |
+
if (
|
| 332 |
+
hasattr(module, "_hf_hook")
|
| 333 |
+
and hasattr(module._hf_hook, "execution_device")
|
| 334 |
+
and module._hf_hook.execution_device is not None
|
| 335 |
+
):
|
| 336 |
+
return torch.device(module._hf_hook.execution_device)
|
| 337 |
+
return self.device
|
| 338 |
+
|
| 339 |
+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
|
| 340 |
+
r"""
|
| 341 |
+
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
| 342 |
+
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
| 343 |
+
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
|
| 344 |
+
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
|
| 345 |
+
|
| 346 |
+
Arguments:
|
| 347 |
+
gpu_id (`int`, *optional*):
|
| 348 |
+
The ID of the accelerator that shall be used in inference. If not specified, it will default to 0.
|
| 349 |
+
device (`torch.Device` or `str`, *optional*, defaults to "cuda"):
|
| 350 |
+
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
| 351 |
+
default to "cuda".
|
| 352 |
+
"""
|
| 353 |
+
if self.model_cpu_offload_seq is None:
|
| 354 |
+
raise ValueError(
|
| 355 |
+
"Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set."
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
|
| 359 |
+
from accelerate import cpu_offload_with_hook
|
| 360 |
+
else:
|
| 361 |
+
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
|
| 362 |
+
|
| 363 |
+
torch_device = torch.device(device)
|
| 364 |
+
device_index = torch_device.index
|
| 365 |
+
|
| 366 |
+
if gpu_id is not None and device_index is not None:
|
| 367 |
+
raise ValueError(
|
| 368 |
+
f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
|
| 369 |
+
f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0
|
| 373 |
+
self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0)
|
| 374 |
+
|
| 375 |
+
device_type = torch_device.type
|
| 376 |
+
device = torch.device(f"{device_type}:{self._offload_gpu_id}")
|
| 377 |
+
|
| 378 |
+
if self.device.type != "cpu":
|
| 379 |
+
self.to("cpu")
|
| 380 |
+
device_mod = getattr(torch, self.device.type, None)
|
| 381 |
+
if hasattr(device_mod, "empty_cache") and device_mod.is_available():
|
| 382 |
+
device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
| 383 |
+
|
| 384 |
+
all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)}
|
| 385 |
+
|
| 386 |
+
self._all_hooks = []
|
| 387 |
+
hook = None
|
| 388 |
+
for model_str in self.model_cpu_offload_seq.split("->"):
|
| 389 |
+
model = all_model_components.pop(model_str, None)
|
| 390 |
+
if not isinstance(model, torch.nn.Module):
|
| 391 |
+
continue
|
| 392 |
+
|
| 393 |
+
_, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook)
|
| 394 |
+
self._all_hooks.append(hook)
|
| 395 |
+
|
| 396 |
+
# CPU offload models that are not in the seq chain unless they are explicitly excluded
|
| 397 |
+
# these models will stay on CPU until maybe_free_model_hooks is called
|
| 398 |
+
# some models cannot be in the seq chain because they are iteratively called, such as controlnet
|
| 399 |
+
for name, model in all_model_components.items():
|
| 400 |
+
if not isinstance(model, torch.nn.Module):
|
| 401 |
+
continue
|
| 402 |
+
|
| 403 |
+
if name in self._exclude_from_cpu_offload:
|
| 404 |
+
model.to(device)
|
| 405 |
+
else:
|
| 406 |
+
_, hook = cpu_offload_with_hook(model, device)
|
| 407 |
+
self._all_hooks.append(hook)
|
| 408 |
+
|
| 409 |
+
def maybe_free_model_hooks(self):
|
| 410 |
+
r"""
|
| 411 |
+
Function that offloads all components, removes all model hooks that were added when using
|
| 412 |
+
`enable_model_cpu_offload` and then applies them again. In case the model has not been offloaded this function
|
| 413 |
+
is a no-op. Make sure to add this function to the end of the `__call__` function of your pipeline so that it
|
| 414 |
+
functions correctly when applying enable_model_cpu_offload.
|
| 415 |
+
"""
|
| 416 |
+
if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0:
|
| 417 |
+
# `enable_model_cpu_offload` has not be called, so silently do nothing
|
| 418 |
+
return
|
| 419 |
+
|
| 420 |
+
for hook in self._all_hooks:
|
| 421 |
+
# offload model and remove hook from model
|
| 422 |
+
hook.offload()
|
| 423 |
+
hook.remove()
|
| 424 |
+
|
| 425 |
+
# make sure the model is in the same state as before calling it
|
| 426 |
+
self.enable_model_cpu_offload()
|
| 427 |
|
| 428 |
@synchronize_timer('Encode cond')
|
| 429 |
def encode_cond(self, image, additional_cond_inputs, do_classifier_free_guidance, dual_guidance):
|