Spaces:

bmarci
/

NextStep-1-Large

Running on Zero

App Files Files Community

bmarci commited on Aug 23

Commit

35c1a87

1 Parent(s): 18907bb

adjustable cfg

Browse files

Files changed (1) hide show

app.py +72 -16

app.py CHANGED Viewed

@@ -25,6 +25,8 @@ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device,
 MAX_SEED = np.iinfo(np.int16).max
 DEFAULT_POSITIVE_PROMPT = None
 DEFAULT_NEGATIVE_PROMPT = None
 def _ensure_pil(x):
     """Ensure returned image is a PIL.Image.Image."""
@@ -36,11 +38,12 @@ def _ensure_pil(x):
     if isinstance(x, np.ndarray):
         if x.dtype != np.uint8:
             x = (x * 255.0).clip(0, 255).astype(np.uint8)
-        if x.ndim == 3 and x.shape[0] in (1,3,4):  # CHW -> HWC
             x = np.moveaxis(x, 0, -1)
         return Image.fromarray(x)
     raise TypeError("Unsupported image type returned by pipeline.")
 @spaces.GPU(duration=300)
 def infer(
     prompt=None,
@@ -48,6 +51,7 @@ def infer(
     width=512,
     height=512,
     num_inference_steps=28,
     positive_prompt=DEFAULT_POSITIVE_PROMPT,
     negative_prompt=DEFAULT_NEGATIVE_PROMPT,
     progress=gr.Progress(track_tqdm=True),
@@ -64,7 +68,7 @@ def infer(
             num_images_per_caption=1,
             positive_prompt=positive_prompt,
             negative_prompt=negative_prompt,
-            cfg=7.5,
             cfg_img=1.0,
             cfg_schedule="constant",
             use_norm=False,
@@ -76,6 +80,7 @@ def infer(
     return _ensure_pil(imgs[0])  # Return raw output exactly as generated
 css = """
 #col-container {
     margin: 0 auto;
@@ -85,7 +90,7 @@ css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# NextStep-1-Large — Exact Output Size")
         with gr.Row():
             prompt = gr.Text(
@@ -144,6 +149,14 @@ with gr.Blocks(css=css) as demo:
                         step=64,
                         value=512,
                     )
         with gr.Row():
             result_1 = gr.Image(
@@ -154,25 +167,66 @@ with gr.Blocks(css=css) as demo:
                 format="png",
             )
-        # Click & Fill Examples (all <=512px)
         examples = [
             [
-                "A cozy wooden cabin by a frozen lake, northern lights in the sky",
-                123, 512, 512, 28,
-                "photorealistic, cinematic lighting, starry night, glowing reflections",
-                "low-res, distorted, extra objects"
             ],
             [
-                "Futuristic city skyline at sunset, flying cars, neon reflections",
-                456, 512, 384, 30,
-                "detailed, vibrant, cinematic, sharp edges",
-                "washed out, cartoon, blurry"
             ],
             [
-                "Close-up of a rare orchid in a greenhouse with soft morning light",
-                789, 384, 512, 32,
-                "macro lens effect, ultra-detailed petals, dew drops",
-                "grainy, noisy, oversaturated"
             ],
         ]
@@ -184,6 +238,7 @@ with gr.Blocks(css=css) as demo:
                 width,
                 height,
                 num_inference_steps,
                 positive_prompt,
                 negative_prompt,
             ],
@@ -202,6 +257,7 @@ with gr.Blocks(css=css) as demo:
             width,
             height,
             num_inference_steps,
             positive_prompt,
             negative_prompt,
         ],

 MAX_SEED = np.iinfo(np.int16).max
 DEFAULT_POSITIVE_PROMPT = None
 DEFAULT_NEGATIVE_PROMPT = None
+DEFAULT_CFG = 7.5
 def _ensure_pil(x):
     """Ensure returned image is a PIL.Image.Image."""
     if isinstance(x, np.ndarray):
         if x.dtype != np.uint8:
             x = (x * 255.0).clip(0, 255).astype(np.uint8)
+        if x.ndim == 3 and x.shape[0] in (1, 3, 4):  # CHW -> HWC
             x = np.moveaxis(x, 0, -1)
         return Image.fromarray(x)
     raise TypeError("Unsupported image type returned by pipeline.")
 @spaces.GPU(duration=300)
 def infer(
     prompt=None,
     width=512,
     height=512,
     num_inference_steps=28,
+    cfg=DEFAULT_CFG,
     positive_prompt=DEFAULT_POSITIVE_PROMPT,
     negative_prompt=DEFAULT_NEGATIVE_PROMPT,
     progress=gr.Progress(track_tqdm=True),
             num_images_per_caption=1,
             positive_prompt=positive_prompt,
             negative_prompt=negative_prompt,
+            cfg=float(cfg),
             cfg_img=1.0,
             cfg_schedule="constant",
             use_norm=False,
     return _ensure_pil(imgs[0])  # Return raw output exactly as generated
 css = """
 #col-container {
     margin: 0 auto;
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# NextStep-1-Large — Image generation")
         with gr.Row():
             prompt = gr.Text(
                         step=64,
                         value=512,
                     )
+                cfg = gr.Slider(
+                    label="CFG (guidance scale)",
+                    minimum=0.0,
+                    maximum=20.0,
+                    step=0.5,
+                    value=DEFAULT_CFG,
+                    info="Higher = closer to text, lower = more creative",
+                )
         with gr.Row():
             result_1 = gr.Image(
                 format="png",
             )
         examples = [
             [
+                "Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
+                101, 512, 512, 32, 7.5,
+                "photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
+                "over-smoothed skin, plastic look, extra limbs, watermark",
+            ],
+            [
+                "Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
+                202, 512, 384, 30, 8.5,
+                "isometric view, clean lines, stylized, warm ambience, detailed furniture",
+                "text, logo, watermark, perspective distortion",
+            ],
+            [
+                "Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
+                303, 512, 320, 28, 7.0,
+                "cinematic, volumetric light, natural colors, high dynamic range",
+                "over-saturated, haze artifacts, blown highlights",
+            ],
+            [
+                "Cute red panda astronaut sticker, chibi style, white background",
+                404, 384, 384, 24, 9.0,
+                "vector look, bold outlines, high contrast, die-cut silhouette",
+                "background clutter, drop shadow, gradients, text",
+            ],
+            [
+                "Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
+                505, 512, 384, 28, 7.0,
+                "clean backdrop, realistic reflections, subtle bloom, high detail",
+                "noise, fingerprints, text, label",
+            ],
+            [
+                "Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
+                606, 512, 512, 22, 6.0,
+                "flat colors, minimal palette, crisp edges, balanced composition",
+                "photo realism, gradients, noisy texture",
+            ],
+            [
+                "Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
+                707, 384, 512, 34, 7.0,
+                "textured canvas, visible brush strokes, dramatic sky, moody lighting",
+                "smooth digital look, airbrush, neon colors",
+            ],
+            [
+                "Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
+                808, 512, 384, 30, 8.0,
+                "physically-based rendering, soft fog, realistic materials, scale figures",
+                "tilt, skew, warped geometry, chromatic aberration",
             ],
             [
+                "Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
+                909, 512, 512, 32, 8.5,
+                "glowing tendrils, soft caustics, particles, high detail",
+                "washed out, murky, low contrast, extra heads",
             ],
             [
+                "Line art coloring page of a city skyline with hot air balloons",
+                111, 512, 512, 18, 5.5,
+                "clean black outlines, uniform stroke weight, high contrast, no shading",
+                "gray fill, gradients, cross-hatching, text",
             ],
         ]
                 width,
                 height,
                 num_inference_steps,
+                cfg,
                 positive_prompt,
                 negative_prompt,
             ],
             width,
             height,
             num_inference_steps,
+            cfg,
             positive_prompt,
             negative_prompt,
         ],