SkyReels_L

Paused

App Files Files Community

1inkusFace commited on Mar 6

Commit

9191d3a

verified ·

1 Parent(s): 5568046

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -37

app.py CHANGED Viewed

@@ -31,7 +31,6 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 logger = logging.getLogger(__name__)
 # --- Dummy Classes (Keep for standalone execution) ---
 class OffloadConfig:
     def __init__(
@@ -46,30 +45,24 @@ class OffloadConfig:
         self.compiler_transformer = compiler_transformer
         self.compiler_cache = compiler_cache
 class TaskType:  # Keep here for infer
     T2V = 0
     I2V = 1
 class LlamaModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return LlamaModel()
     def to(self, device):
         return self
 class HunyuanVideoTransformer3DModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return HunyuanVideoTransformer3DModel()
     def to(self, device):
         return self
 class SkyreelsVideoPipeline:
     @staticmethod
     def from_pretrained(*args, **kwargs):
@@ -82,21 +75,17 @@ class SkyreelsVideoPipeline:
         num_frames = kwargs.get("num_frames", 16)  # Default to 16 frames
         height = kwargs.get("height", 512)
         width = kwargs.get("width", 512)
         if "image" in kwargs:  # I2V
             image = kwargs["image"]
             # Convert PIL Image to PyTorch tensor (and normalize to [0, 1])
             image_tensor = torch.from_numpy(np.array(image)).float() / 255.0
             image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)  # (H, W, C) -> (1, C, H, W)
             # Create video by repeating the image
             frames = image_tensor.repeat(1, 1, num_frames, 1, 1)  # (1, C, T, H, W)
             frames = frames + torch.randn_like(frames) * 0.05  # Add a little noise
             # frames = frames.permute(0, 2, 1, 3, 4) # NO PERMUTE HERE
         else:  # T2V
             frames = torch.randn(1, 3, num_frames, height, width)  # Use correct dims: (1, C, T, H, W)
         return type("obj", (object,), {"frames": frames})()  # No longer a list!
     def __init__(self):
@@ -112,18 +101,12 @@ class SkyreelsVideoPipeline:
         def enable_tiling(self):
             pass
 def quantize_(*args, **kwargs):
     return
 def float8_weight_only():
     return
-# --- End Dummy Classes ---
 class SkyReelsVideoSingleGpuInfer:
     def _load_model(
         self, model_id: str, base_model_id: str = "hunyuanvideo-community/HunyuanVideo", quant_model: bool = True
@@ -135,7 +118,6 @@ class SkyReelsVideoSingleGpuInfer:
         transformer = HunyuanVideoTransformer3DModel.from_pretrained(
             model_id, torch_dtype=torch.bfloat16, device="cpu"
         ).to("cpu")
         if quant_model:
             quantize_(text_encoder, float8_weight_only())
             text_encoder.to("cpu")
@@ -143,7 +125,6 @@ class SkyReelsVideoSingleGpuInfer:
             quantize_(transformer, float8_weight_only())
             transformer.to("cpu")
             torch.cuda.empty_cache()
         pipe = SkyreelsVideoPipeline.from_pretrained(
             base_model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.bfloat16
         ).to("cpu")
@@ -174,18 +155,14 @@ class SkyReelsVideoSingleGpuInfer:
         """Initializes the model and moves it to the GPU."""
         if self.is_initialized:
             return
         if not torch.cuda.is_available():
             raise RuntimeError("CUDA is not available. Cannot initialize model.")
         self.gpu_device = "cuda:0"
         self.pipe = self._load_model(model_id=self.model_id, quant_model=self.quant_model)
         if self.is_offload:
             pass  # Offloading logic (if any) would go here
         else:
             self.pipe.to(self.gpu_device)
         if self.offload_config.compiler_transformer:
             torch._dynamo.config.suppress_errors = True
             os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
@@ -200,7 +177,6 @@ class SkyReelsVideoSingleGpuInfer:
     def warm_up(self):
         if not self.is_initialized:
             raise RuntimeError("Model must be initialized before warm-up.")
         init_kwargs = {
             "prompt": "A woman is dancing in a room",
             "height": 544,
@@ -228,10 +204,8 @@ class SkyReelsVideoSingleGpuInfer:
         result = self.pipe(**kwargs).frames  # Return the tensor directly
         return result
 _predictor = None
 @spaces.GPU(duration=90)
 def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict]:
     """Generates a video based on the given prompt and seed.
@@ -245,11 +219,9 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
         A tuple containing the path to the generated video and the parameters used.
     """
     global _predictor
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
     if image is None:
         task_type = TaskType.T2V
         model_id = "Skywork/SkyReels-V1-Hunyuan-T2V"
@@ -279,7 +251,6 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
             "negative_prompt": "Aerial view, low quality, bad hands",
             "cfg_for": False, #Keep if present in the original
         }
     if _predictor is None:
         _predictor = SkyReelsVideoSingleGpuInfer(
             task_type=task_type,
@@ -294,15 +265,12 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
         )
         _predictor.initialize()
         logger.info("Predictor initialized")
     with torch.no_grad():
         output = _predictor.infer(**kwargs) #Removed [0]
     output = (output.numpy() * 255).astype(np.uint8)
-    # CRITICAL CHANGE: Transpose *after* converting to numpy and taking output[0]
-    #output = output.transpose(1, 2, 0, 3)  # (T, H, W, C)
-    print(output.shape)
-    print(output[0].shape)
     save_dir = f"./result"
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{seed}.mp4"
@@ -310,7 +278,6 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
     export_to_video(output, video_out_file, fps=24)
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
@@ -330,7 +297,6 @@ def create_gradio_interface():
         )
     return demo
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch()

 logger = logging.getLogger(__name__)
 # --- Dummy Classes (Keep for standalone execution) ---
 class OffloadConfig:
     def __init__(
         self.compiler_transformer = compiler_transformer
         self.compiler_cache = compiler_cache
 class TaskType:  # Keep here for infer
     T2V = 0
     I2V = 1
 class LlamaModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return LlamaModel()
     def to(self, device):
         return self
 class HunyuanVideoTransformer3DModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return HunyuanVideoTransformer3DModel()
     def to(self, device):
         return self
 class SkyreelsVideoPipeline:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         num_frames = kwargs.get("num_frames", 16)  # Default to 16 frames
         height = kwargs.get("height", 512)
         width = kwargs.get("width", 512)
         if "image" in kwargs:  # I2V
             image = kwargs["image"]
             # Convert PIL Image to PyTorch tensor (and normalize to [0, 1])
             image_tensor = torch.from_numpy(np.array(image)).float() / 255.0
             image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)  # (H, W, C) -> (1, C, H, W)
             # Create video by repeating the image
             frames = image_tensor.repeat(1, 1, num_frames, 1, 1)  # (1, C, T, H, W)
             frames = frames + torch.randn_like(frames) * 0.05  # Add a little noise
             # frames = frames.permute(0, 2, 1, 3, 4) # NO PERMUTE HERE
         else:  # T2V
             frames = torch.randn(1, 3, num_frames, height, width)  # Use correct dims: (1, C, T, H, W)
         return type("obj", (object,), {"frames": frames})()  # No longer a list!
     def __init__(self):
         def enable_tiling(self):
             pass
 def quantize_(*args, **kwargs):
     return
 def float8_weight_only():
     return
 class SkyReelsVideoSingleGpuInfer:
     def _load_model(
         self, model_id: str, base_model_id: str = "hunyuanvideo-community/HunyuanVideo", quant_model: bool = True
         transformer = HunyuanVideoTransformer3DModel.from_pretrained(
             model_id, torch_dtype=torch.bfloat16, device="cpu"
         ).to("cpu")
         if quant_model:
             quantize_(text_encoder, float8_weight_only())
             text_encoder.to("cpu")
             quantize_(transformer, float8_weight_only())
             transformer.to("cpu")
             torch.cuda.empty_cache()
         pipe = SkyreelsVideoPipeline.from_pretrained(
             base_model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.bfloat16
         ).to("cpu")
         """Initializes the model and moves it to the GPU."""
         if self.is_initialized:
             return
         if not torch.cuda.is_available():
             raise RuntimeError("CUDA is not available. Cannot initialize model.")
         self.gpu_device = "cuda:0"
         self.pipe = self._load_model(model_id=self.model_id, quant_model=self.quant_model)
         if self.is_offload:
             pass  # Offloading logic (if any) would go here
         else:
             self.pipe.to(self.gpu_device)
         if self.offload_config.compiler_transformer:
             torch._dynamo.config.suppress_errors = True
             os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
     def warm_up(self):
         if not self.is_initialized:
             raise RuntimeError("Model must be initialized before warm-up.")
         init_kwargs = {
             "prompt": "A woman is dancing in a room",
             "height": 544,
         result = self.pipe(**kwargs).frames  # Return the tensor directly
         return result
 _predictor = None
 @spaces.GPU(duration=90)
 def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict]:
     """Generates a video based on the given prompt and seed.
         A tuple containing the path to the generated video and the parameters used.
     """
     global _predictor
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
     if image is None:
         task_type = TaskType.T2V
         model_id = "Skywork/SkyReels-V1-Hunyuan-T2V"
             "negative_prompt": "Aerial view, low quality, bad hands",
             "cfg_for": False, #Keep if present in the original
         }
     if _predictor is None:
         _predictor = SkyReelsVideoSingleGpuInfer(
             task_type=task_type,
         )
         _predictor.initialize()
         logger.info("Predictor initialized")
     with torch.no_grad():
         output = _predictor.infer(**kwargs) #Removed [0]
     output = (output.numpy() * 255).astype(np.uint8)
+    # Correct Transpose: (1, C, T, H, W) -> (1, T, H, W, C)
+    output = output.transpose(0, 2, 3, 4, 1)
+    output = output[0]  # Remove batch dimension: (T, H, W, C)
     save_dir = f"./result"
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{seed}.mp4"
     export_to_video(output, video_out_file, fps=24)
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
         )
     return demo
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch()