Spaces:

irah23
/

stable-video-demo

Running

App Files Files Community

irah23 commited on Apr 22

Commit

6b6138e

1 Parent(s): 2208b08

update space

Browse files

Files changed (1) hide show

app.py +40 -26

app.py CHANGED Viewed

@@ -9,49 +9,63 @@ import imageio
 device = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = np.iinfo(np.int32).max
-# SDXL for image generation
 sdxl_model_id = "stabilityai/sdxl-turbo"
-image_pipe = DiffusionPipeline.from_pretrained(sdxl_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
-image_pipe = image_pipe.to(device)
-# Stable Video Diffusion for video generation
 svd_model_id = "stabilityai/stable-video-diffusion-img2vid"
-video_pipe = StableVideoDiffusionPipeline.from_pretrained(svd_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32, variant="fp16" if device == "cuda" else None)
-video_pipe.enable_model_cpu_offload() if device == "cuda" else None
 def generate_video_from_text(prompt, seed=0, randomize_seed=True):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
-    # Generate image from text
-    image = image_pipe(prompt=prompt, generator=generator, guidance_scale=0.0, num_inference_steps=2, width=1024, height=1024).images[0]
-    # Resize to 512x512
     image = image.resize((512, 512))
-    # Generate video frames from image
-    video_frames = video_pipe(image).frames[0]  # list of PIL images
-    # Convert to video (MP4)
     video_path = f"/tmp/generated_{seed}.mp4"
     imageio.mimsave(video_path, video_frames, fps=7)
     return video_path, image, seed
-with gr.Blocks() as demo:
-    gr.Markdown("## Text to Video using SDXL + Stable Video Diffusion")
-    with gr.Row():
-        prompt = gr.Textbox(label="Prompt", placeholder="Describe your scene...")
-        run_button = gr.Button("Generate")
-    video_output = gr.Video(label="Generated Video")
-    image_output = gr.Image(label="Generated Image")
-    seed_output = gr.Number(label="Seed")
-    run_button.click(fn=generate_video_from_text, inputs=[prompt], outputs=[video_output, image_output, seed_output])
 demo.api_name = "predict"
 demo.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = np.iinfo(np.int32).max
+# Load SDXL for image generation
 sdxl_model_id = "stabilityai/sdxl-turbo"
+image_pipe = DiffusionPipeline.from_pretrained(
+    sdxl_model_id,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32
+).to(device)
+# Load Stable Video Diffusion for video generation
 svd_model_id = "stabilityai/stable-video-diffusion-img2vid"
+video_pipe = StableVideoDiffusionPipeline.from_pretrained(
+    svd_model_id,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    variant="fp16" if device == "cuda" else None
+)
+if device == "cuda":
+    video_pipe.enable_model_cpu_offload()
 def generate_video_from_text(prompt, seed=0, randomize_seed=True):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
+    # Generate image
+    image = image_pipe(
+        prompt=prompt,
+        generator=generator,
+        guidance_scale=0.0,
+        num_inference_steps=2,
+        width=1024,
+        height=1024
+    ).images[0]
+    # Resize for SVD
     image = image.resize((512, 512))
+    # Generate video
+    video_frames = video_pipe(image).frames[0]
     video_path = f"/tmp/generated_{seed}.mp4"
     imageio.mimsave(video_path, video_frames, fps=7)
     return video_path, image, seed
+# Use Interface instead of Blocks
+demo = gr.Interface(
+    fn=generate_video_from_text,
+    inputs=[
+        gr.Textbox(label="Prompt", placeholder="Describe your scene..."),
+        gr.Number(label="Seed", value=0),
+        gr.Checkbox(label="Randomize Seed", value=True)
+    ],
+    outputs=[
+        gr.Video(label="Generated Video"),
+        gr.Image(label="Generated Image"),
+        gr.Number(label="Seed Used")
+    ]
+)
+# Expose endpoint
 demo.api_name = "predict"
 demo.launch()