Spaces:
Running
on
Zero
Running
on
Zero
adjustable cfg
Browse files
app.py
CHANGED
@@ -25,6 +25,8 @@ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device,
|
|
25 |
MAX_SEED = np.iinfo(np.int16).max
|
26 |
DEFAULT_POSITIVE_PROMPT = None
|
27 |
DEFAULT_NEGATIVE_PROMPT = None
|
|
|
|
|
28 |
|
29 |
def _ensure_pil(x):
|
30 |
"""Ensure returned image is a PIL.Image.Image."""
|
@@ -36,11 +38,12 @@ def _ensure_pil(x):
|
|
36 |
if isinstance(x, np.ndarray):
|
37 |
if x.dtype != np.uint8:
|
38 |
x = (x * 255.0).clip(0, 255).astype(np.uint8)
|
39 |
-
if x.ndim == 3 and x.shape[0] in (1,3,4): # CHW -> HWC
|
40 |
x = np.moveaxis(x, 0, -1)
|
41 |
return Image.fromarray(x)
|
42 |
raise TypeError("Unsupported image type returned by pipeline.")
|
43 |
|
|
|
44 |
@spaces.GPU(duration=300)
|
45 |
def infer(
|
46 |
prompt=None,
|
@@ -48,6 +51,7 @@ def infer(
|
|
48 |
width=512,
|
49 |
height=512,
|
50 |
num_inference_steps=28,
|
|
|
51 |
positive_prompt=DEFAULT_POSITIVE_PROMPT,
|
52 |
negative_prompt=DEFAULT_NEGATIVE_PROMPT,
|
53 |
progress=gr.Progress(track_tqdm=True),
|
@@ -64,7 +68,7 @@ def infer(
|
|
64 |
num_images_per_caption=1,
|
65 |
positive_prompt=positive_prompt,
|
66 |
negative_prompt=negative_prompt,
|
67 |
-
cfg=
|
68 |
cfg_img=1.0,
|
69 |
cfg_schedule="constant",
|
70 |
use_norm=False,
|
@@ -76,6 +80,7 @@ def infer(
|
|
76 |
|
77 |
return _ensure_pil(imgs[0]) # Return raw output exactly as generated
|
78 |
|
|
|
79 |
css = """
|
80 |
#col-container {
|
81 |
margin: 0 auto;
|
@@ -85,7 +90,7 @@ css = """
|
|
85 |
|
86 |
with gr.Blocks(css=css) as demo:
|
87 |
with gr.Column(elem_id="col-container"):
|
88 |
-
gr.Markdown("# NextStep-1-Large —
|
89 |
|
90 |
with gr.Row():
|
91 |
prompt = gr.Text(
|
@@ -144,6 +149,14 @@ with gr.Blocks(css=css) as demo:
|
|
144 |
step=64,
|
145 |
value=512,
|
146 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
with gr.Row():
|
149 |
result_1 = gr.Image(
|
@@ -154,25 +167,66 @@ with gr.Blocks(css=css) as demo:
|
|
154 |
format="png",
|
155 |
)
|
156 |
|
157 |
-
# Click & Fill Examples (all <=512px)
|
158 |
examples = [
|
159 |
[
|
160 |
-
"
|
161 |
-
|
162 |
-
"photorealistic,
|
163 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
],
|
165 |
[
|
166 |
-
"
|
167 |
-
|
168 |
-
"
|
169 |
-
"washed out,
|
170 |
],
|
171 |
[
|
172 |
-
"
|
173 |
-
|
174 |
-
"
|
175 |
-
"
|
176 |
],
|
177 |
]
|
178 |
|
@@ -184,6 +238,7 @@ with gr.Blocks(css=css) as demo:
|
|
184 |
width,
|
185 |
height,
|
186 |
num_inference_steps,
|
|
|
187 |
positive_prompt,
|
188 |
negative_prompt,
|
189 |
],
|
@@ -202,6 +257,7 @@ with gr.Blocks(css=css) as demo:
|
|
202 |
width,
|
203 |
height,
|
204 |
num_inference_steps,
|
|
|
205 |
positive_prompt,
|
206 |
negative_prompt,
|
207 |
],
|
|
|
25 |
MAX_SEED = np.iinfo(np.int16).max
|
26 |
DEFAULT_POSITIVE_PROMPT = None
|
27 |
DEFAULT_NEGATIVE_PROMPT = None
|
28 |
+
DEFAULT_CFG = 7.5
|
29 |
+
|
30 |
|
31 |
def _ensure_pil(x):
|
32 |
"""Ensure returned image is a PIL.Image.Image."""
|
|
|
38 |
if isinstance(x, np.ndarray):
|
39 |
if x.dtype != np.uint8:
|
40 |
x = (x * 255.0).clip(0, 255).astype(np.uint8)
|
41 |
+
if x.ndim == 3 and x.shape[0] in (1, 3, 4): # CHW -> HWC
|
42 |
x = np.moveaxis(x, 0, -1)
|
43 |
return Image.fromarray(x)
|
44 |
raise TypeError("Unsupported image type returned by pipeline.")
|
45 |
|
46 |
+
|
47 |
@spaces.GPU(duration=300)
|
48 |
def infer(
|
49 |
prompt=None,
|
|
|
51 |
width=512,
|
52 |
height=512,
|
53 |
num_inference_steps=28,
|
54 |
+
cfg=DEFAULT_CFG,
|
55 |
positive_prompt=DEFAULT_POSITIVE_PROMPT,
|
56 |
negative_prompt=DEFAULT_NEGATIVE_PROMPT,
|
57 |
progress=gr.Progress(track_tqdm=True),
|
|
|
68 |
num_images_per_caption=1,
|
69 |
positive_prompt=positive_prompt,
|
70 |
negative_prompt=negative_prompt,
|
71 |
+
cfg=float(cfg),
|
72 |
cfg_img=1.0,
|
73 |
cfg_schedule="constant",
|
74 |
use_norm=False,
|
|
|
80 |
|
81 |
return _ensure_pil(imgs[0]) # Return raw output exactly as generated
|
82 |
|
83 |
+
|
84 |
css = """
|
85 |
#col-container {
|
86 |
margin: 0 auto;
|
|
|
90 |
|
91 |
with gr.Blocks(css=css) as demo:
|
92 |
with gr.Column(elem_id="col-container"):
|
93 |
+
gr.Markdown("# NextStep-1-Large — Image generation")
|
94 |
|
95 |
with gr.Row():
|
96 |
prompt = gr.Text(
|
|
|
149 |
step=64,
|
150 |
value=512,
|
151 |
)
|
152 |
+
cfg = gr.Slider(
|
153 |
+
label="CFG (guidance scale)",
|
154 |
+
minimum=0.0,
|
155 |
+
maximum=20.0,
|
156 |
+
step=0.5,
|
157 |
+
value=DEFAULT_CFG,
|
158 |
+
info="Higher = closer to text, lower = more creative",
|
159 |
+
)
|
160 |
|
161 |
with gr.Row():
|
162 |
result_1 = gr.Image(
|
|
|
167 |
format="png",
|
168 |
)
|
169 |
|
|
|
170 |
examples = [
|
171 |
[
|
172 |
+
"Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
|
173 |
+
101, 512, 512, 32, 7.5,
|
174 |
+
"photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
|
175 |
+
"over-smoothed skin, plastic look, extra limbs, watermark",
|
176 |
+
],
|
177 |
+
[
|
178 |
+
"Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
|
179 |
+
202, 512, 384, 30, 8.5,
|
180 |
+
"isometric view, clean lines, stylized, warm ambience, detailed furniture",
|
181 |
+
"text, logo, watermark, perspective distortion",
|
182 |
+
],
|
183 |
+
[
|
184 |
+
"Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
|
185 |
+
303, 512, 320, 28, 7.0,
|
186 |
+
"cinematic, volumetric light, natural colors, high dynamic range",
|
187 |
+
"over-saturated, haze artifacts, blown highlights",
|
188 |
+
],
|
189 |
+
[
|
190 |
+
"Cute red panda astronaut sticker, chibi style, white background",
|
191 |
+
404, 384, 384, 24, 9.0,
|
192 |
+
"vector look, bold outlines, high contrast, die-cut silhouette",
|
193 |
+
"background clutter, drop shadow, gradients, text",
|
194 |
+
],
|
195 |
+
[
|
196 |
+
"Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
|
197 |
+
505, 512, 384, 28, 7.0,
|
198 |
+
"clean backdrop, realistic reflections, subtle bloom, high detail",
|
199 |
+
"noise, fingerprints, text, label",
|
200 |
+
],
|
201 |
+
[
|
202 |
+
"Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
|
203 |
+
606, 512, 512, 22, 6.0,
|
204 |
+
"flat colors, minimal palette, crisp edges, balanced composition",
|
205 |
+
"photo realism, gradients, noisy texture",
|
206 |
+
],
|
207 |
+
[
|
208 |
+
"Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
|
209 |
+
707, 384, 512, 34, 7.0,
|
210 |
+
"textured canvas, visible brush strokes, dramatic sky, moody lighting",
|
211 |
+
"smooth digital look, airbrush, neon colors",
|
212 |
+
],
|
213 |
+
[
|
214 |
+
"Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
|
215 |
+
808, 512, 384, 30, 8.0,
|
216 |
+
"physically-based rendering, soft fog, realistic materials, scale figures",
|
217 |
+
"tilt, skew, warped geometry, chromatic aberration",
|
218 |
],
|
219 |
[
|
220 |
+
"Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
|
221 |
+
909, 512, 512, 32, 8.5,
|
222 |
+
"glowing tendrils, soft caustics, particles, high detail",
|
223 |
+
"washed out, murky, low contrast, extra heads",
|
224 |
],
|
225 |
[
|
226 |
+
"Line art coloring page of a city skyline with hot air balloons",
|
227 |
+
111, 512, 512, 18, 5.5,
|
228 |
+
"clean black outlines, uniform stroke weight, high contrast, no shading",
|
229 |
+
"gray fill, gradients, cross-hatching, text",
|
230 |
],
|
231 |
]
|
232 |
|
|
|
238 |
width,
|
239 |
height,
|
240 |
num_inference_steps,
|
241 |
+
cfg,
|
242 |
positive_prompt,
|
243 |
negative_prompt,
|
244 |
],
|
|
|
257 |
width,
|
258 |
height,
|
259 |
num_inference_steps,
|
260 |
+
cfg,
|
261 |
positive_prompt,
|
262 |
negative_prompt,
|
263 |
],
|