Spaces:

bmarci
/

NextStep-1-Large

Running on Zero

App Files Files Community

bmarci commited on Aug 23

Commit

1feed0d

1 Parent(s): 35c1a87

More precise GPU allocation

Browse files

Files changed (1) hide show

app.py +158 -165

app.py CHANGED Viewed

@@ -44,23 +44,11 @@ def _ensure_pil(x):
     raise TypeError("Unsupported image type returned by pipeline.")
-@spaces.GPU(duration=300)
-def infer(
-    prompt=None,
-    seed=0,
-    width=512,
-    height=512,
-    num_inference_steps=28,
-    cfg=DEFAULT_CFG,
-    positive_prompt=DEFAULT_POSITIVE_PROMPT,
-    negative_prompt=DEFAULT_NEGATIVE_PROMPT,
-    progress=gr.Progress(track_tqdm=True),
-):
-    """Run inference at exactly (width, height)."""
     if prompt in [None, ""]:
         gr.Warning("⚠️ Please enter a prompt!")
         return None
     with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
         imgs = pipeline.generate_image(
             prompt,
@@ -77,15 +65,107 @@ def infer(
             seed=int(seed),
             progress=True,
         )
-    return _ensure_pil(imgs[0])  # Return raw output exactly as generated
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 800px;
 }
 """
 with gr.Blocks(css=css) as demo:
@@ -93,178 +173,91 @@ with gr.Blocks(css=css) as demo:
         gr.Markdown("# NextStep-1-Large — Image generation")
         with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=2,
-                placeholder="Enter your prompt",
-                container=False,
-            )
             run_button = gr.Button("Run", scale=0, variant="primary")
             cancel_button = gr.Button("Cancel", scale=0, variant="secondary")
         with gr.Row():
             with gr.Accordion("Advanced Settings", open=True):
-                positive_prompt = gr.Text(
-                    label="Positive Prompt",
-                    show_label=True,
-                    max_lines=1,
-                    placeholder="Optional: add positives",
-                    container=True,
-                )
-                negative_prompt = gr.Text(
-                    label="Negative Prompt",
-                    show_label=True,
-                    max_lines=2,
-                    placeholder="Optional: add negatives",
-                    container=True,
-                )
                 with gr.Row():
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=0,
-                        maximum=MAX_SEED,
-                        step=1,
-                        value=3407,
-                    )
-                    num_inference_steps = gr.Slider(
-                        label="Sampling steps",
-                        minimum=10,
-                        maximum=50,
-                        step=1,
-                        value=28,
-                    )
                 with gr.Row():
-                    width = gr.Slider(
-                        label="Width",
-                        minimum=256,
-                        maximum=512,
-                        step=64,
-                        value=512,
-                    )
-                    height = gr.Slider(
-                        label="Height",
-                        minimum=256,
-                        maximum=512,
-                        step=64,
-                        value=512,
-                    )
-                cfg = gr.Slider(
-                    label="CFG (guidance scale)",
-                    minimum=0.0,
-                    maximum=20.0,
-                    step=0.5,
-                    value=DEFAULT_CFG,
-                    info="Higher = closer to text, lower = more creative",
-                )
         with gr.Row():
-            result_1 = gr.Image(
-                label="Result",
-                show_label=True,
-                container=True,
-                interactive=False,
-                format="png",
-            )
         examples = [
             [
                 "Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
                 101, 512, 512, 32, 7.5,
                 "photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
-                "over-smoothed skin, plastic look, extra limbs, watermark",
-            ],
-            [
-                "Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
-                202, 512, 384, 30, 8.5,
-                "isometric view, clean lines, stylized, warm ambience, detailed furniture",
-                "text, logo, watermark, perspective distortion",
-            ],
-            [
-                "Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
-                303, 512, 320, 28, 7.0,
-                "cinematic, volumetric light, natural colors, high dynamic range",
-                "over-saturated, haze artifacts, blown highlights",
-            ],
-            [
-                "Cute red panda astronaut sticker, chibi style, white background",
-                404, 384, 384, 24, 9.0,
-                "vector look, bold outlines, high contrast, die-cut silhouette",
-                "background clutter, drop shadow, gradients, text",
-            ],
-            [
-                "Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
-                505, 512, 384, 28, 7.0,
-                "clean backdrop, realistic reflections, subtle bloom, high detail",
-                "noise, fingerprints, text, label",
-            ],
-            [
-                "Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
-                606, 512, 512, 22, 6.0,
-                "flat colors, minimal palette, crisp edges, balanced composition",
-                "photo realism, gradients, noisy texture",
-            ],
-            [
-                "Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
-                707, 384, 512, 34, 7.0,
-                "textured canvas, visible brush strokes, dramatic sky, moody lighting",
-                "smooth digital look, airbrush, neon colors",
-            ],
-            [
-                "Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
-                808, 512, 384, 30, 8.0,
-                "physically-based rendering, soft fog, realistic materials, scale figures",
-                "tilt, skew, warped geometry, chromatic aberration",
-            ],
-            [
-                "Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
-                909, 512, 512, 32, 8.5,
-                "glowing tendrils, soft caustics, particles, high detail",
-                "washed out, murky, low contrast, extra heads",
-            ],
-            [
-                "Line art coloring page of a city skyline with hot air balloons",
-                111, 512, 512, 18, 5.5,
-                "clean black outlines, uniform stroke weight, high contrast, no shading",
-                "gray fill, gradients, cross-hatching, text",
-            ],
         ]
         gr.Examples(
             examples=examples,
-            inputs=[
-                prompt,
-                seed,
-                width,
-                height,
-                num_inference_steps,
-                cfg,
-                positive_prompt,
-                negative_prompt,
-            ],
             label="Click & Fill Examples (Exact Size)",
         )
-    def show_result():
-        return gr.update(visible=True)
-    generation_event = gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            seed,
-            width,
-            height,
-            num_inference_steps,
-            cfg,
-            positive_prompt,
-            negative_prompt,
-        ],
-        outputs=[result_1],
-    )
-    cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[generation_event])
 if __name__ == "__main__":
-    demo.launch()

     raise TypeError("Unsupported image type returned by pipeline.")
+def infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress):
+    """Core inference logic without GPU decorators."""
     if prompt in [None, ""]:
         gr.Warning("⚠️ Please enter a prompt!")
         return None
     with autocast(device_type=("cuda" if device == "cuda" else "cpu"), dtype=torch.bfloat16):
         imgs = pipeline.generate_image(
             prompt,
             seed=int(seed),
             progress=True,
         )
+    return _ensure_pil(imgs[0])
+# Tier 1: Very small images with few steps
+@spaces.GPU(duration=90)
+def infer_tiny(prompt=None, seed=0, width=512, height=512, num_inference_steps=24, cfg=DEFAULT_CFG,
+               positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+               progress=gr.Progress(track_tqdm=True)):
+    return infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress)
+# Tier 2: Small to medium images with standard steps
+@spaces.GPU(duration=150)
+def infer_fast(prompt=None, seed=0, width=512, height=512, num_inference_steps=24, cfg=DEFAULT_CFG,
+               positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+               progress=gr.Progress(track_tqdm=True)):
+    return infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress)
+# Tier 3: Standard generation for most common cases
+@spaces.GPU(duration=200)
+def infer_std(prompt=None, seed=0, width=512, height=512, num_inference_steps=28, cfg=DEFAULT_CFG,
+              positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+              progress=gr.Progress(track_tqdm=True)):
+    return infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress)
+# Tier 4: Larger images or more steps
+@spaces.GPU(duration=300)
+def infer_long(prompt=None, seed=0, width=512, height=512, num_inference_steps=36, cfg=DEFAULT_CFG,
+               positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+               progress=gr.Progress(track_tqdm=True)):
+    return infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress)
+# Tier 5: Maximum quality with many steps
+@spaces.GPU(duration=400)
+def infer_max(prompt=None, seed=0, width=512, height=512, num_inference_steps=45, cfg=DEFAULT_CFG,
+              positive_prompt=DEFAULT_POSITIVE_PROMPT, negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+              progress=gr.Progress(track_tqdm=True)):
+    return infer_core(prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt, progress)
+# Improved JS dispatcher with better calculation logic
+js_dispatch = """
+function(width, height, steps){
+  const w = Number(width);
+  const h = Number(height);
+  const s = Number(steps);
+  // Calculate total pixels and complexity score
+  const pixels = w * h;
+  const megapixels = pixels / 1000000;
+  // Complexity score combines image size and steps
+  // Base: ~0.5 seconds per megapixel per step
+  const complexity = megapixels * s;
+  let target = 'btn-std';  // Default
+  // Select appropriate tier based on complexity
+  if (pixels <= 256*256 && s <= 20) {
+    // Very small images with few steps
+    target = 'btn-tiny';
+  } else if (complexity < 5) {
+    // Small images or few steps (e.g., 384x384 @ 24 steps = 3.5)
+    target = 'btn-fast';
+  } else if (complexity < 8) {
+    // Standard generation (e.g., 512x512 @ 28 steps = 7.3)
+    target = 'btn-std';
+  } else if (complexity < 12) {
+    // Larger or more steps (e.g., 512x512 @ 40 steps = 10.5)
+    target = 'btn-long';
+  } else {
+    // Maximum complexity
+    target = 'btn-max';
+  }
+  // Special cases: override based on extreme values
+  if (s >= 45) {
+    target = 'btn-max';  // Many steps always need more time
+  } else if (pixels >= 512*512 && s >= 35) {
+    target = 'btn-long';  // Large images with many steps
+  }
+  console.log(`Resolution: ${w}x${h}, Steps: ${s}, Complexity: ${complexity.toFixed(2)}, Selected: ${target}`);
+  const b = document.getElementById(target);
+  if (b) b.click();
+}
+"""
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 800px;
 }
+/* Hide the dispatcher buttons */
+#btn-tiny, #btn-fast, #btn-std, #btn-long, #btn-max {
+    display: none !important;
+}
 """
 with gr.Blocks(css=css) as demo:
         gr.Markdown("# NextStep-1-Large — Image generation")
         with gr.Row():
+            prompt = gr.Text(label="Prompt", show_label=False, max_lines=2, placeholder="Enter your prompt",
+                             container=False)
             run_button = gr.Button("Run", scale=0, variant="primary")
             cancel_button = gr.Button("Cancel", scale=0, variant="secondary")
         with gr.Row():
             with gr.Accordion("Advanced Settings", open=True):
+                positive_prompt = gr.Text(label="Positive Prompt", show_label=True,
+                                          placeholder="Optional: add positives")
+                negative_prompt = gr.Text(label="Negative Prompt", show_label=True,
+                                          placeholder="Optional: add negatives")
                 with gr.Row():
+                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=3407)
+                    num_inference_steps = gr.Slider(label="Sampling steps", minimum=10, maximum=50, step=1, value=28)
                 with gr.Row():
+                    width = gr.Slider(label="Width", minimum=256, maximum=512, step=64, value=512)
+                    height = gr.Slider(label="Height", minimum=256, maximum=512, step=64, value=512)
+                cfg = gr.Slider(label="CFG (guidance scale)", minimum=0.0, maximum=20.0, step=0.5, value=DEFAULT_CFG,
+                                info="Higher = closer to text, lower = more creative")
         with gr.Row():
+            result_1 = gr.Image(label="Result", format="png", interactive=False)
+        # Hidden dispatcher buttons
+        with gr.Row(visible=False):
+            btn_tiny = gr.Button(visible=False, elem_id="btn-tiny")
+            btn_fast = gr.Button(visible=False, elem_id="btn-fast")
+            btn_std = gr.Button(visible=False, elem_id="btn-std")
+            btn_long = gr.Button(visible=False, elem_id="btn-long")
+            btn_max = gr.Button(visible=False, elem_id="btn-max")
         examples = [
             [
                 "Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
                 101, 512, 512, 32, 7.5,
                 "photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
+                "over-smoothed skin, plastic look, extra limbs, watermark"],
+            ["Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
+             202, 512, 384, 30, 8.5,
+             "isometric view, clean lines, stylized, warm ambience, detailed furniture",
+             "text, logo, watermark, perspective distortion"],
+            ["Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
+             303, 512, 320, 28, 7.0,
+             "cinematic, volumetric light, natural colors, high dynamic range",
+             "over-saturated, haze artifacts, blown highlights"],
+            ["Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
+             707, 384, 512, 34, 7.0,
+             "textured canvas, visible brush strokes, dramatic sky, moody lighting",
+             "smooth digital look, airbrush, neon colors"],
         ]
         gr.Examples(
             examples=examples,
+            inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt, negative_prompt],
             label="Click & Fill Examples (Exact Size)",
         )
+        # Wire up the dispatcher buttons to their respective functions
+        ev_tiny = btn_tiny.click(infer_tiny,
+                                 inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt,
+                                         negative_prompt],
+                                 outputs=[result_1])
+        ev_fast = btn_fast.click(infer_fast,
+                                 inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt,
+                                         negative_prompt],
+                                 outputs=[result_1])
+        ev_std = btn_std.click(infer_std,
+                               inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt,
+                                       negative_prompt],
+                               outputs=[result_1])
+        ev_long = btn_long.click(infer_long,
+                                 inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt,
+                                         negative_prompt],
+                                 outputs=[result_1])
+        ev_max = btn_max.click(infer_max,
+                               inputs=[prompt, seed, width, height, num_inference_steps, cfg, positive_prompt,
+                                       negative_prompt],
+                               outputs=[result_1])
+        # Trigger JS dispatcher on run button or prompt submit
+        run_button.click(None, inputs=[width, height, num_inference_steps], outputs=[], js=js_dispatch)
+        prompt.submit(None, inputs=[width, height, num_inference_steps], outputs=[], js=js_dispatch)
+        # Cancel button cancels all possible events
+        cancel_button.click(fn=None, inputs=None, outputs=None, cancels=[ev_tiny, ev_fast, ev_std, ev_long, ev_max])
 if __name__ == "__main__":
+    demo.launch()