SkyReels_L

Paused

App Files Files Community

1inkusFace commited on Mar 6

Commit

b113647

verified ·

1 Parent(s): e6ed20e

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -12

app.py CHANGED Viewed

@@ -8,12 +8,7 @@ import subprocess
 from PIL import Image
 import numpy as np
-# subprocess.run(['sh', './sky.sh'])  # Removed as it's likely environment-specific
-# sys.path.append("./SkyReels-V1") # Removed as it's likely environment-specific
-# from skyreelsinfer import TaskType  # Dummy classes cover this
-# from skyreelsinfer.offload import OffloadConfig # Dummy classes cover this
-# from skyreelsinfer.skyreels_video_infer import SkyReelsVideoSingleGpuInfer # Dummy classes cover this
 from diffusers.utils import export_to_video
 import torch
@@ -31,6 +26,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 logger = logging.getLogger(__name__)
 # --- Dummy Classes (Keep for standalone execution) ---
 class OffloadConfig:
     def __init__(
@@ -45,24 +41,30 @@ class OffloadConfig:
         self.compiler_transformer = compiler_transformer
         self.compiler_cache = compiler_cache
 class TaskType:  # Keep here for infer
     T2V = 0
     I2V = 1
 class LlamaModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return LlamaModel()
     def to(self, device):
         return self
 class HunyuanVideoTransformer3DModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return HunyuanVideoTransformer3DModel()
     def to(self, device):
         return self
 class SkyreelsVideoPipeline:
     @staticmethod
     def from_pretrained(*args, **kwargs):
@@ -75,17 +77,21 @@ class SkyreelsVideoPipeline:
         num_frames = kwargs.get("num_frames", 16)  # Default to 16 frames
         height = kwargs.get("height", 512)
         width = kwargs.get("width", 512)
         if "image" in kwargs:  # I2V
             image = kwargs["image"]
             # Convert PIL Image to PyTorch tensor (and normalize to [0, 1])
             image_tensor = torch.from_numpy(np.array(image)).float() / 255.0
             image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)  # (H, W, C) -> (1, C, H, W)
             # Create video by repeating the image
             frames = image_tensor.repeat(1, 1, num_frames, 1, 1)  # (1, C, T, H, W)
             frames = frames + torch.randn_like(frames) * 0.05  # Add a little noise
-            # frames = frames.permute(0, 2, 1, 3, 4) # NO PERMUTE HERE
         else:  # T2V
-            frames = torch.randn(1, 3, num_frames, height, width)  # Use correct dims: (1, C, T, H, W)
         return type("obj", (object,), {"frames": frames})()  # No longer a list!
     def __init__(self):
@@ -101,12 +107,18 @@ class SkyreelsVideoPipeline:
         def enable_tiling(self):
             pass
 def quantize_(*args, **kwargs):
     return
 def float8_weight_only():
     return
 class SkyReelsVideoSingleGpuInfer:
     def _load_model(
         self, model_id: str, base_model_id: str = "hunyuanvideo-community/HunyuanVideo", quant_model: bool = True
@@ -118,6 +130,7 @@ class SkyReelsVideoSingleGpuInfer:
         transformer = HunyuanVideoTransformer3DModel.from_pretrained(
             model_id, torch_dtype=torch.bfloat16, device="cpu"
         ).to("cpu")
         if quant_model:
             quantize_(text_encoder, float8_weight_only())
             text_encoder.to("cpu")
@@ -125,6 +138,7 @@ class SkyReelsVideoSingleGpuInfer:
             quantize_(transformer, float8_weight_only())
             transformer.to("cpu")
             torch.cuda.empty_cache()
         pipe = SkyreelsVideoPipeline.from_pretrained(
             base_model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.bfloat16
         ).to("cpu")
@@ -155,14 +169,18 @@ class SkyReelsVideoSingleGpuInfer:
         """Initializes the model and moves it to the GPU."""
         if self.is_initialized:
             return
         if not torch.cuda.is_available():
             raise RuntimeError("CUDA is not available. Cannot initialize model.")
         self.gpu_device = "cuda:0"
         self.pipe = self._load_model(model_id=self.model_id, quant_model=self.quant_model)
         if self.is_offload:
-            pass  # Offloading logic (if any) would go here
         else:
             self.pipe.to(self.gpu_device)
         if self.offload_config.compiler_transformer:
             torch._dynamo.config.suppress_errors = True
             os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
@@ -177,6 +195,7 @@ class SkyReelsVideoSingleGpuInfer:
     def warm_up(self):
         if not self.is_initialized:
             raise RuntimeError("Model must be initialized before warm-up.")
         init_kwargs = {
             "prompt": "A woman is dancing in a room",
             "height": 544,
@@ -204,8 +223,10 @@ class SkyReelsVideoSingleGpuInfer:
         result = self.pipe(**kwargs).frames  # Return the tensor directly
         return result
 _predictor = None
 @spaces.GPU(duration=90)
 def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict]:
     """Generates a video based on the given prompt and seed.
@@ -219,9 +240,11 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
         A tuple containing the path to the generated video and the parameters used.
     """
     global _predictor
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
     if image is None:
         task_type = TaskType.T2V
         model_id = "Skywork/SkyReels-V1-Hunyuan-T2V"
@@ -249,8 +272,9 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
             "guidance_scale": 6.0,
             "embedded_guidance_scale": 1.0,
             "negative_prompt": "Aerial view, low quality, bad hands",
-            "cfg_for": False, #Keep if present in the original
         }
     if _predictor is None:
         _predictor = SkyReelsVideoSingleGpuInfer(
             task_type=task_type,
@@ -265,12 +289,16 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
         )
         _predictor.initialize()
         logger.info("Predictor initialized")
     with torch.no_grad():
-        output = _predictor.infer(**kwargs) #Removed [0]
     output = (output.numpy() * 255).astype(np.uint8)
     # Correct Transpose: (1, C, T, H, W) -> (1, T, H, W, C)
     output = output.transpose(0, 2, 3, 4, 1)
-    #output = output[0]  # Remove batch dimension: (T, H, W, C)
     save_dir = f"./result"
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{seed}.mp4"
@@ -278,6 +306,7 @@ def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict
     export_to_video(output, video_out_file, fps=24)
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
@@ -297,6 +326,7 @@ def create_gradio_interface():
         )
     return demo
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch()

 from PIL import Image
 import numpy as np
+# Removed environment-specific lines
 from diffusers.utils import export_to_video
 import torch
 logger = logging.getLogger(__name__)
 # --- Dummy Classes (Keep for standalone execution) ---
 class OffloadConfig:
     def __init__(
         self.compiler_transformer = compiler_transformer
         self.compiler_cache = compiler_cache
 class TaskType:  # Keep here for infer
     T2V = 0
     I2V = 1
 class LlamaModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return LlamaModel()
     def to(self, device):
         return self
 class HunyuanVideoTransformer3DModel:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         return HunyuanVideoTransformer3DModel()
     def to(self, device):
         return self
 class SkyreelsVideoPipeline:
     @staticmethod
     def from_pretrained(*args, **kwargs):
         num_frames = kwargs.get("num_frames", 16)  # Default to 16 frames
         height = kwargs.get("height", 512)
         width = kwargs.get("width", 512)
         if "image" in kwargs:  # I2V
             image = kwargs["image"]
             # Convert PIL Image to PyTorch tensor (and normalize to [0, 1])
             image_tensor = torch.from_numpy(np.array(image)).float() / 255.0
             image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)  # (H, W, C) -> (1, C, H, W)
             # Create video by repeating the image
             frames = image_tensor.repeat(1, 1, num_frames, 1, 1)  # (1, C, T, H, W)
             frames = frames + torch.randn_like(frames) * 0.05  # Add a little noise
+            # Correct shape: (1, C, T, H, W) - NO PERMUTE HERE
         else:  # T2V
+            frames = torch.randn(1, 3, num_frames, height, width)  # (1, C, T, H, W) - Correct!
         return type("obj", (object,), {"frames": frames})()  # No longer a list!
     def __init__(self):
         def enable_tiling(self):
             pass
 def quantize_(*args, **kwargs):
     return
 def float8_weight_only():
     return
+# --- End Dummy Classes ---
 class SkyReelsVideoSingleGpuInfer:
     def _load_model(
         self, model_id: str, base_model_id: str = "hunyuanvideo-community/HunyuanVideo", quant_model: bool = True
         transformer = HunyuanVideoTransformer3DModel.from_pretrained(
             model_id, torch_dtype=torch.bfloat16, device="cpu"
         ).to("cpu")
         if quant_model:
             quantize_(text_encoder, float8_weight_only())
             text_encoder.to("cpu")
             quantize_(transformer, float8_weight_only())
             transformer.to("cpu")
             torch.cuda.empty_cache()
         pipe = SkyreelsVideoPipeline.from_pretrained(
             base_model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch.bfloat16
         ).to("cpu")
         """Initializes the model and moves it to the GPU."""
         if self.is_initialized:
             return
         if not torch.cuda.is_available():
             raise RuntimeError("CUDA is not available. Cannot initialize model.")
         self.gpu_device = "cuda:0"
         self.pipe = self._load_model(model_id=self.model_id, quant_model=self.quant_model)
         if self.is_offload:
+            pass
         else:
             self.pipe.to(self.gpu_device)
         if self.offload_config.compiler_transformer:
             torch._dynamo.config.suppress_errors = True
             os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"
     def warm_up(self):
         if not self.is_initialized:
             raise RuntimeError("Model must be initialized before warm-up.")
         init_kwargs = {
             "prompt": "A woman is dancing in a room",
             "height": 544,
         result = self.pipe(**kwargs).frames  # Return the tensor directly
         return result
 _predictor = None
 @spaces.GPU(duration=90)
 def generate_video(prompt: str, seed: int, image: str = None) -> tuple[str, dict]:
     """Generates a video based on the given prompt and seed.
         A tuple containing the path to the generated video and the parameters used.
     """
     global _predictor
     if seed == -1:
         random.seed()
         seed = int(random.randrange(4294967294))
     if image is None:
         task_type = TaskType.T2V
         model_id = "Skywork/SkyReels-V1-Hunyuan-T2V"
             "guidance_scale": 6.0,
             "embedded_guidance_scale": 1.0,
             "negative_prompt": "Aerial view, low quality, bad hands",
+            "cfg_for": False,
         }
     if _predictor is None:
         _predictor = SkyReelsVideoSingleGpuInfer(
             task_type=task_type,
         )
         _predictor.initialize()
         logger.info("Predictor initialized")
     with torch.no_grad():
+        output = _predictor.infer(**kwargs)
     output = (output.numpy() * 255).astype(np.uint8)
     # Correct Transpose: (1, C, T, H, W) -> (1, T, H, W, C)
     output = output.transpose(0, 2, 3, 4, 1)
+    output = output[0]  # Remove batch dimension: (T, H, W, C)
     save_dir = f"./result"
     os.makedirs(save_dir, exist_ok=True)
     video_out_file = f"{save_dir}/{seed}.mp4"
     export_to_video(output, video_out_file, fps=24)
     return video_out_file, kwargs
 def create_gradio_interface():
     with gr.Blocks() as demo:
         with gr.Row():
         )
     return demo
 if __name__ == "__main__":
     demo = create_gradio_interface()
     demo.queue().launch()