RuntimeError: The size of tensor a (7) must match the size of tensor b (16) at non-singleton dimension 1
#1
by
DsnTgr
- opened
It happens:
RuntimeError Traceback (most recent call last)
Cell In[1], line 22
17 prompt = "A man with short gray hair plays a red electric guitar."
18 image = load_image(
19 "https://hf-mirror.com/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png"
20 )
---> 22 output = pipe(image=image, prompt=prompt).frames[0]
23 export_to_video(output, "HunyuanVideo-I2V-diffusers.mp4", fps=15)
File ~/miniconda3/envs/ai-train/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/miniconda3/envs/ai-train/lib/python3.10/site-packages/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py:733, in HunyuanVideoImageToVideoPipeline.__call__(self, image, prompt, prompt_2, negative_prompt, negative_prompt_2, height, width, num_frames, num_inference_steps, sigmas, true_cfg_scale, guidance_scale, num_videos_per_prompt, generator, latents, prompt_embeds, pooled_prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_pooled_prompt_embeds, negative_prompt_attention_mask, output_type, return_dict, attention_kwargs, callback_on_step_end, callback_on_step_end_tensor_inputs, prompt_template, max_sequence_length)
731 image_tensor = self.video_processor.preprocess(image, height, width).to(device, vae_dtype)
732 num_channels_latents = (self.transformer.config.in_channels - 1) // 2
--> 733 latents, image_latents = self.prepare_latents(
734 image_tensor,
735 batch_size * num_videos_per_prompt,
736 num_channels_latents,
737 height,
738 width,
739 num_frames,
740 torch.float32,
741 device,
742 generator,
743 latents,
744 )
745 image_latents[:, :, 1:] = 0
746 mask = image_latents.new_ones(image_latents.shape[0], 1, *image_latents.shape[2:])
File ~/miniconda3/envs/ai-train/lib/python3.10/site-packages/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py:514, in HunyuanVideoImageToVideoPipeline.prepare_latents(self, image, batch_size, num_channels_latents, height, width, num_frames, dtype, device, generator, latents)
511 latents = latents.to(device=device, dtype=dtype)
513 t = torch.tensor([0.999]).to(device=device)
--> 514 latents = latents * t + image_latents * (1 - t)
516 return latents, image_latents
RuntimeError: The size of tensor a (7) must match the size of tensor b (16) at non-singleton dimension 1
Have you installed diffusers from this branch? https://github.com/huggingface/diffusers/pull/11066