import spaces import torch import gradio as gr import time from diffusers import CogVideoXImageToVideoPipeline from diffusers.utils import export_to_video, load_image # Load model once pipe = CogVideoXImageToVideoPipeline.from_pretrained( "THUDM/CogVideoX1.5-5B-I2V", torch_dtype=torch.bfloat16 ) pipe.enable_sequential_cpu_offload() pipe.vae.enable_tiling() pipe.vae.enable_slicing() @spaces.GPU(duration=120) def generate_video(image, prompt): if image is None: raise gr.Error("Please upload an input image") if not prompt: raise gr.Error("Please enter a text prompt") pipe.to('cuda') # Load uploaded image input_image = load_image(image) # Generate video video_frames = pipe( prompt=prompt, image=input_image, num_videos_per_prompt=1, num_inference_steps=50, num_frames=81, guidance_scale=6, generator=torch.Generator(device="cuda").manual_seed(42), ).frames[0] # Save to temporary file output_path = f"output_{int(time.time())}.mp4" export_to_video(video_frames, output_path, fps=8) return output_path with gr.Blocks(title="CogVideoX Image-to-Video") as demo: gr.Markdown("# 🎥 CogVideoX Image-to-Video Generation") gr.Markdown("Transform images into videos using AI! Upload an image and enter a description to generate a video.") with gr.Row(): with gr.Column(): image_input = gr.Image(label="Input Image", type="filepath") prompt_input = gr.Textbox(label="Prompt", placeholder="Describe the video you want to generate...") submit_btn = gr.Button("Generate Video") with gr.Column(): video_output = gr.Video(label="Generated Video") gr.Examples(examples=examples, inputs=[image_input, prompt_input]) submit_btn.click( fn=generate_video, inputs=[image_input, prompt_input], outputs=video_output, ) if __name__ == "__main__": demo.launch()