Spaces:
Paused
Paused
| import spaces | |
| import gradio as gr | |
| import argparse # Keep argparse, but we'll modify its use | |
| import sys | |
| import time | |
| import os | |
| import random | |
| # VERY IMPORTANT: Add the SkyReels-V1 root directory to the Python path | |
| # Assuming your app.py is in the root of your cloned/forked repo. | |
| sys.path.append(".") # Correct path for Hugging Face Space | |
| from skyreelsinfer import TaskType | |
| from skyreelsinfer.offload import OffloadConfig | |
| from skyreelsinfer.skyreels_video_infer import SkyReelsVideoInfer | |
| from diffusers.utils import export_to_video | |
| from diffusers.utils import load_image | |
| import torch # Import Torch | |
| torch.backends.cuda.matmul.allow_tf32 = False | |
| torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False | |
| torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False | |
| torch.backends.cudnn.allow_tf32 = False | |
| torch.backends.cudnn.deterministic = False | |
| torch.backends.cudnn.benchmark = False | |
| torch.set_float32_matmul_precision("highest") | |
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
| # --- Model Loading (CRITICAL CHANGES) --- | |
| predictor = None # Global predictor, BUT loaded inside a function | |
| def get_transformer_model_id(task_type: str) -> str: | |
| return "Skywork/SkyReels-V1-Hunyuan-I2V" if task_type == "i2v" else "Skywork/SkyReels-V1-Hunyuan-T2V" | |
| def init_predictor(task_type: str): | |
| global predictor | |
| try: | |
| predictor = SkyReelsVideoInfer( | |
| task_type=TaskType.I2V if task_type == "i2v" else TaskType.T2V, | |
| model_id=get_transformer_model_id(task_type), | |
| quant_model=True, # Keep quantization for smaller model size | |
| world_size=1, # VERY IMPORTANT: Set world_size to 1 for CPU | |
| is_offload=True, # Keep offload for CPU | |
| offload_config=OffloadConfig( | |
| high_cpu_memory=True, | |
| parameters_level=True, | |
| compiler_transformer=False, # Consider setting to True if compatible | |
| ) | |
| ) | |
| # Explicitly move the predictor to CPU (CRUCIAL) | |
| if hasattr(predictor, 'pipe') and hasattr(predictor.pipe, 'to'): #check to make sure the predictor has a pipe and to() method | |
| predictor.pipe.to("cpu") | |
| return "Model loaded successfully!" | |
| except Exception as e: | |
| return f"Error loading model: {e}" | |
| def generate_video(prompt, seed, image=None, task_type=None): | |
| global predictor | |
| # Input Type Validation | |
| if task_type == "i2v" and not isinstance(image, str): | |
| return "Error: For i2v, please provide a valid image file path.", "{}" | |
| if not isinstance(prompt, str) or not isinstance(seed, (int, float)): | |
| return "Error: Invalid input types for prompt or seed.", "{}" | |
| if seed == -1: | |
| random.seed(time.time()) | |
| seed = int(random.randrange(4294967294)) | |
| kwargs = { | |
| "prompt": prompt, | |
| "height": 512, # Consider reducing for faster processing on CPU | |
| "width": 512, # Consider reducing for faster processing on CPU | |
| "num_frames": 97, # Consider reducing for faster processing on CPU | |
| "num_inference_steps": 30, # Consider reducing for faster processing | |
| "seed": int(seed), #make sure seed is int | |
| "guidance_scale": 6.0, | |
| "embedded_guidance_scale": 1.0, | |
| "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion", | |
| "cfg_for": False, | |
| } | |
| if task_type == "i2v": | |
| if image is None or not os.path.exists(image): | |
| return "Error: Image not provided or not found.", "{}" | |
| try: | |
| kwargs["image"] = load_image(image=image) | |
| except Exception as e: | |
| return f"Error loading image: {e}", "{}" | |
| try: | |
| #Ensure Predictor is Loaded | |
| if predictor is None: | |
| return "Error: Model not initialized. Please reload the Space.", "{}" | |
| output = predictor.inference(kwargs) | |
| save_dir = f"./result/{task_type}" | |
| os.makedirs(save_dir, exist_ok=True) | |
| video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{int(seed)}.mp4" # Ensure seed is an integer | |
| print(f"Generating video, local path: {video_out_file}") | |
| export_to_video(output, video_out_file, fps=24) | |
| return video_out_file, str(kwargs) # Return kwargs as a string | |
| except Exception as e: | |
| return f"Error during video generation: {e}", "{}" | |
| # --- Gradio Interface --- | |
| # We'll define a single interface that handles BOTH i2v and t2v | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| task_type_dropdown = gr.Dropdown( | |
| choices=["i2v", "t2v"], label="Task Type", value="t2v" | |
| ) # Default to t2v | |
| load_model_button = gr.Button("Load Model") | |
| model_status = gr.Textbox(label="Model Status") | |
| with gr.Row(): | |
| with gr.Column(): # Use Columns for better layout | |
| prompt = gr.Textbox(label="Input Prompt") | |
| seed = gr.Number(label="Random Seed", value=-1) | |
| image = gr.Image(label="Upload Image (for i2v)", type="filepath") | |
| submit_button = gr.Button("Generate Video") | |
| with gr.Column(): | |
| output_video = gr.Video(label="Generated Video") | |
| output_params = gr.Textbox(label="Output Parameters") | |
| # Load Model Button Logic | |
| load_model_button.click( | |
| fn=init_predictor, | |
| inputs=[task_type_dropdown], | |
| outputs=[model_status] | |
| ) | |
| # Submit Button Logic (Handles both i2v and t2v) | |
| submit_button.click( | |
| fn=generate_video, | |
| inputs=[prompt, seed, image, task_type_dropdown], # Include task_type | |
| outputs=[output_video, output_params], | |
| ) | |
| # --- Launch the App --- | |
| # No need for argparse in app.py for Hugging Face Spaces | |
| demo.launch() # Don't use demo.launch() inside HuggingFace Spaces. |