bmarci commited on
Commit
35c1a87
·
1 Parent(s): 18907bb

adjustable cfg

Browse files
Files changed (1) hide show
  1. app.py +72 -16
app.py CHANGED
@@ -25,6 +25,8 @@ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device=device,
25
  MAX_SEED = np.iinfo(np.int16).max
26
  DEFAULT_POSITIVE_PROMPT = None
27
  DEFAULT_NEGATIVE_PROMPT = None
 
 
28
 
29
  def _ensure_pil(x):
30
  """Ensure returned image is a PIL.Image.Image."""
@@ -36,11 +38,12 @@ def _ensure_pil(x):
36
  if isinstance(x, np.ndarray):
37
  if x.dtype != np.uint8:
38
  x = (x * 255.0).clip(0, 255).astype(np.uint8)
39
- if x.ndim == 3 and x.shape[0] in (1,3,4): # CHW -> HWC
40
  x = np.moveaxis(x, 0, -1)
41
  return Image.fromarray(x)
42
  raise TypeError("Unsupported image type returned by pipeline.")
43
 
 
44
  @spaces.GPU(duration=300)
45
  def infer(
46
  prompt=None,
@@ -48,6 +51,7 @@ def infer(
48
  width=512,
49
  height=512,
50
  num_inference_steps=28,
 
51
  positive_prompt=DEFAULT_POSITIVE_PROMPT,
52
  negative_prompt=DEFAULT_NEGATIVE_PROMPT,
53
  progress=gr.Progress(track_tqdm=True),
@@ -64,7 +68,7 @@ def infer(
64
  num_images_per_caption=1,
65
  positive_prompt=positive_prompt,
66
  negative_prompt=negative_prompt,
67
- cfg=7.5,
68
  cfg_img=1.0,
69
  cfg_schedule="constant",
70
  use_norm=False,
@@ -76,6 +80,7 @@ def infer(
76
 
77
  return _ensure_pil(imgs[0]) # Return raw output exactly as generated
78
 
 
79
  css = """
80
  #col-container {
81
  margin: 0 auto;
@@ -85,7 +90,7 @@ css = """
85
 
86
  with gr.Blocks(css=css) as demo:
87
  with gr.Column(elem_id="col-container"):
88
- gr.Markdown("# NextStep-1-Large — Exact Output Size")
89
 
90
  with gr.Row():
91
  prompt = gr.Text(
@@ -144,6 +149,14 @@ with gr.Blocks(css=css) as demo:
144
  step=64,
145
  value=512,
146
  )
 
 
 
 
 
 
 
 
147
 
148
  with gr.Row():
149
  result_1 = gr.Image(
@@ -154,25 +167,66 @@ with gr.Blocks(css=css) as demo:
154
  format="png",
155
  )
156
 
157
- # Click & Fill Examples (all <=512px)
158
  examples = [
159
  [
160
- "A cozy wooden cabin by a frozen lake, northern lights in the sky",
161
- 123, 512, 512, 28,
162
- "photorealistic, cinematic lighting, starry night, glowing reflections",
163
- "low-res, distorted, extra objects"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  ],
165
  [
166
- "Futuristic city skyline at sunset, flying cars, neon reflections",
167
- 456, 512, 384, 30,
168
- "detailed, vibrant, cinematic, sharp edges",
169
- "washed out, cartoon, blurry"
170
  ],
171
  [
172
- "Close-up of a rare orchid in a greenhouse with soft morning light",
173
- 789, 384, 512, 32,
174
- "macro lens effect, ultra-detailed petals, dew drops",
175
- "grainy, noisy, oversaturated"
176
  ],
177
  ]
178
 
@@ -184,6 +238,7 @@ with gr.Blocks(css=css) as demo:
184
  width,
185
  height,
186
  num_inference_steps,
 
187
  positive_prompt,
188
  negative_prompt,
189
  ],
@@ -202,6 +257,7 @@ with gr.Blocks(css=css) as demo:
202
  width,
203
  height,
204
  num_inference_steps,
 
205
  positive_prompt,
206
  negative_prompt,
207
  ],
 
25
  MAX_SEED = np.iinfo(np.int16).max
26
  DEFAULT_POSITIVE_PROMPT = None
27
  DEFAULT_NEGATIVE_PROMPT = None
28
+ DEFAULT_CFG = 7.5
29
+
30
 
31
  def _ensure_pil(x):
32
  """Ensure returned image is a PIL.Image.Image."""
 
38
  if isinstance(x, np.ndarray):
39
  if x.dtype != np.uint8:
40
  x = (x * 255.0).clip(0, 255).astype(np.uint8)
41
+ if x.ndim == 3 and x.shape[0] in (1, 3, 4): # CHW -> HWC
42
  x = np.moveaxis(x, 0, -1)
43
  return Image.fromarray(x)
44
  raise TypeError("Unsupported image type returned by pipeline.")
45
 
46
+
47
  @spaces.GPU(duration=300)
48
  def infer(
49
  prompt=None,
 
51
  width=512,
52
  height=512,
53
  num_inference_steps=28,
54
+ cfg=DEFAULT_CFG,
55
  positive_prompt=DEFAULT_POSITIVE_PROMPT,
56
  negative_prompt=DEFAULT_NEGATIVE_PROMPT,
57
  progress=gr.Progress(track_tqdm=True),
 
68
  num_images_per_caption=1,
69
  positive_prompt=positive_prompt,
70
  negative_prompt=negative_prompt,
71
+ cfg=float(cfg),
72
  cfg_img=1.0,
73
  cfg_schedule="constant",
74
  use_norm=False,
 
80
 
81
  return _ensure_pil(imgs[0]) # Return raw output exactly as generated
82
 
83
+
84
  css = """
85
  #col-container {
86
  margin: 0 auto;
 
90
 
91
  with gr.Blocks(css=css) as demo:
92
  with gr.Column(elem_id="col-container"):
93
+ gr.Markdown("# NextStep-1-Large — Image generation")
94
 
95
  with gr.Row():
96
  prompt = gr.Text(
 
149
  step=64,
150
  value=512,
151
  )
152
+ cfg = gr.Slider(
153
+ label="CFG (guidance scale)",
154
+ minimum=0.0,
155
+ maximum=20.0,
156
+ step=0.5,
157
+ value=DEFAULT_CFG,
158
+ info="Higher = closer to text, lower = more creative",
159
+ )
160
 
161
  with gr.Row():
162
  result_1 = gr.Image(
 
167
  format="png",
168
  )
169
 
 
170
  examples = [
171
  [
172
+ "Studio portrait of an elderly sailor with a weathered face, dramatic Rembrandt lighting, shallow depth of field",
173
+ 101, 512, 512, 32, 7.5,
174
+ "photorealistic, sharp eyes, detailed skin texture, soft rim light, 85mm lens",
175
+ "over-smoothed skin, plastic look, extra limbs, watermark",
176
+ ],
177
+ [
178
+ "Isometric cozy coffee shop interior with hanging plants and warm Edison bulbs",
179
+ 202, 512, 384, 30, 8.5,
180
+ "isometric view, clean lines, stylized, warm ambience, detailed furniture",
181
+ "text, logo, watermark, perspective distortion",
182
+ ],
183
+ [
184
+ "Ultra-wide desert canyon at golden hour with long shadows and dust in the air",
185
+ 303, 512, 320, 28, 7.0,
186
+ "cinematic, volumetric light, natural colors, high dynamic range",
187
+ "over-saturated, haze artifacts, blown highlights",
188
+ ],
189
+ [
190
+ "Cute red panda astronaut sticker, chibi style, white background",
191
+ 404, 384, 384, 24, 9.0,
192
+ "vector look, bold outlines, high contrast, die-cut silhouette",
193
+ "background clutter, drop shadow, gradients, text",
194
+ ],
195
+ [
196
+ "Product render of matte-black wireless headphones on reflective glass with soft studio lighting",
197
+ 505, 512, 384, 28, 7.0,
198
+ "clean backdrop, realistic reflections, subtle bloom, high detail",
199
+ "noise, fingerprints, text, label",
200
+ ],
201
+ [
202
+ "Graphic poster in Bauhaus style with geometric shapes and bold typography placeholders",
203
+ 606, 512, 512, 22, 6.0,
204
+ "flat colors, minimal palette, crisp edges, balanced composition",
205
+ "photo realism, gradients, noisy texture",
206
+ ],
207
+ [
208
+ "Oil painting of a stormy sea with a lighthouse, thick impasto brushwork",
209
+ 707, 384, 512, 34, 7.0,
210
+ "textured canvas, visible brush strokes, dramatic sky, moody lighting",
211
+ "smooth digital look, airbrush, neon colors",
212
+ ],
213
+ [
214
+ "Architectural concept art: glass pavilion in a pine forest at dawn, ground fog",
215
+ 808, 512, 384, 30, 8.0,
216
+ "physically-based rendering, soft fog, realistic materials, scale figures",
217
+ "tilt, skew, warped geometry, chromatic aberration",
218
  ],
219
  [
220
+ "Fantasy creature: bioluminescent jellyfish dragon swimming through a dark ocean trench",
221
+ 909, 512, 512, 32, 8.5,
222
+ "glowing tendrils, soft caustics, particles, high detail",
223
+ "washed out, murky, low contrast, extra heads",
224
  ],
225
  [
226
+ "Line art coloring page of a city skyline with hot air balloons",
227
+ 111, 512, 512, 18, 5.5,
228
+ "clean black outlines, uniform stroke weight, high contrast, no shading",
229
+ "gray fill, gradients, cross-hatching, text",
230
  ],
231
  ]
232
 
 
238
  width,
239
  height,
240
  num_inference_steps,
241
+ cfg,
242
  positive_prompt,
243
  negative_prompt,
244
  ],
 
257
  width,
258
  height,
259
  num_inference_steps,
260
+ cfg,
261
  positive_prompt,
262
  negative_prompt,
263
  ],