SkyReels_L / app.py
1inkusFace's picture
Update app.py
0bedac9 verified
raw
history blame
5.92 kB
import spaces
import gradio as gr
import argparse # Keep argparse, but we'll modify its use
import sys
import time
import os
import random
# VERY IMPORTANT: Add the SkyReels-V1 root directory to the Python path
# Assuming your app.py is in the root of your cloned/forked repo.
sys.path.append(".") # Correct path for Hugging Face Space
from skyreelsinfer import TaskType
from skyreelsinfer.offload import OffloadConfig
from skyreelsinfer.skyreels_video_infer import SkyReelsVideoInfer
from diffusers.utils import export_to_video
from diffusers.utils import load_image
import torch # Import Torch
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
torch.backends.cudnn.allow_tf32 = False
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
torch.set_float32_matmul_precision("highest")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# --- Model Loading (CRITICAL CHANGES) ---
predictor = None # Global predictor, BUT loaded inside a function
def get_transformer_model_id(task_type: str) -> str:
return "Skywork/SkyReels-V1-Hunyuan-I2V" if task_type == "i2v" else "Skywork/SkyReels-V1-Hunyuan-T2V"
@spaces.GPU(duration=90)
def init_predictor(task_type: str):
global predictor
try:
predictor = SkyReelsVideoInfer(
task_type=TaskType.I2V if task_type == "i2v" else TaskType.T2V,
model_id=get_transformer_model_id(task_type),
quant_model=True, # Keep quantization for smaller model size
world_size=1, # VERY IMPORTANT: Set world_size to 1 for CPU
is_offload=True, # Keep offload for CPU
offload_config=OffloadConfig(
high_cpu_memory=True,
parameters_level=True,
compiler_transformer=False, # Consider setting to True if compatible
)
)
# Explicitly move the predictor to CPU (CRUCIAL)
if hasattr(predictor, 'pipe') and hasattr(predictor.pipe, 'to'): #check to make sure the predictor has a pipe and to() method
predictor.pipe.to("cpu")
return "Model loaded successfully!"
except Exception as e:
return f"Error loading model: {e}"
@spaces.GPU(duration=90)
def generate_video(prompt, seed, image=None, task_type=None):
global predictor
# Input Type Validation
if task_type == "i2v" and not isinstance(image, str):
return "Error: For i2v, please provide a valid image file path.", "{}"
if not isinstance(prompt, str) or not isinstance(seed, (int, float)):
return "Error: Invalid input types for prompt or seed.", "{}"
if seed == -1:
random.seed(time.time())
seed = int(random.randrange(4294967294))
kwargs = {
"prompt": prompt,
"height": 512, # Consider reducing for faster processing on CPU
"width": 512, # Consider reducing for faster processing on CPU
"num_frames": 97, # Consider reducing for faster processing on CPU
"num_inference_steps": 30, # Consider reducing for faster processing
"seed": int(seed), #make sure seed is int
"guidance_scale": 6.0,
"embedded_guidance_scale": 1.0,
"negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
"cfg_for": False,
}
if task_type == "i2v":
if image is None or not os.path.exists(image):
return "Error: Image not provided or not found.", "{}"
try:
kwargs["image"] = load_image(image=image)
except Exception as e:
return f"Error loading image: {e}", "{}"
try:
#Ensure Predictor is Loaded
if predictor is None:
return "Error: Model not initialized. Please reload the Space.", "{}"
output = predictor.inference(kwargs)
save_dir = f"./result/{task_type}"
os.makedirs(save_dir, exist_ok=True)
video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{int(seed)}.mp4" # Ensure seed is an integer
print(f"Generating video, local path: {video_out_file}")
export_to_video(output, video_out_file, fps=24)
return video_out_file, str(kwargs) # Return kwargs as a string
except Exception as e:
return f"Error during video generation: {e}", "{}"
# --- Gradio Interface ---
# We'll define a single interface that handles BOTH i2v and t2v
with gr.Blocks() as demo:
with gr.Row():
task_type_dropdown = gr.Dropdown(
choices=["i2v", "t2v"], label="Task Type", value="t2v"
) # Default to t2v
load_model_button = gr.Button("Load Model")
model_status = gr.Textbox(label="Model Status")
with gr.Row():
with gr.Column(): # Use Columns for better layout
prompt = gr.Textbox(label="Input Prompt")
seed = gr.Number(label="Random Seed", value=-1)
image = gr.Image(label="Upload Image (for i2v)", type="filepath")
submit_button = gr.Button("Generate Video")
with gr.Column():
output_video = gr.Video(label="Generated Video")
output_params = gr.Textbox(label="Output Parameters")
# Load Model Button Logic
load_model_button.click(
fn=init_predictor,
inputs=[task_type_dropdown],
outputs=[model_status]
)
# Submit Button Logic (Handles both i2v and t2v)
submit_button.click(
fn=generate_video,
inputs=[prompt, seed, image, task_type_dropdown], # Include task_type
outputs=[output_video, output_params],
)
# --- Launch the App ---
# No need for argparse in app.py for Hugging Face Spaces
demo.launch() # Don't use demo.launch() inside HuggingFace Spaces.