prithivMLmods commited on
Commit
1dcd95e
·
verified ·
1 Parent(s): fe4abc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -205
app.py CHANGED
@@ -1,222 +1,176 @@
1
- import os
2
- import random
3
- import uuid
4
- import json
5
- import gradio as gr
6
- import numpy as np
7
- from PIL import Image
8
  import spaces
 
9
  import torch
10
- from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
11
-
12
- DESCRIPTIONx = """## STABLE HAMSTER 🐹
13
-
14
- """
15
-
16
- css = '''
17
- .gradio-container{max-width: 560px !important}
18
- h1{text-align:center}
19
- footer {
20
- visibility: hidden
21
- }
22
- '''
23
-
24
- examples = [
25
- "3d image, cute girl, in the style of Pixar --ar 1:2 --stylize 750, 4K resolution highlights, Sharp focus, octane render, ray tracing, Ultra-High-Definition, 8k, UHD, HDR, (Masterpiece:1.5), (best quality:1.5)",
26
- "Cold coffee in a cup bokeh --ar 85:128 --v 6.0 --style raw5, 4K",
27
- "Vector illustration of a horse, vector graphic design with flat colors on an brown background in the style of vector art, using simple shapes and graphics with simple details, professionally designed as a tshirt logo ready for print on a white background. --ar 89:82 --v 6.0 --style raw",
28
- "Man in brown leather jacket posing for camera, in the style of sleek and stylized, clockpunk, subtle shades, exacting precision, ferrania p30 --ar 67:101 --v 5",
29
- "Commercial photography, giant burger, white lighting, studio light, 8k octane rendering, high resolution photography, insanely detailed, fine details, on white isolated plain, 8k, commercial photography, stock photo, professional color grading, --v 4 --ar 9:16 "
30
-
31
- ]
32
 
 
 
 
33
 
34
- MODEL_ID = os.getenv("MODEL_VAL_PATH") #Use SDXL Model as "MODEL_REPO" --------->>> ”VALUE”.
35
- MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
36
- USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
37
- ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
38
- BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1")) # Allow generating multiple images at once
39
 
40
- #Load model outside of function
41
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
42
- pipe = StableDiffusionXLPipeline.from_pretrained(
43
- MODEL_ID,
44
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
45
- use_safetensors=True,
46
- add_watermarker=False,
47
- ).to(device)
48
- pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
49
 
50
- # <compile speedup >
51
- if USE_TORCH_COMPILE:
52
- pipe.compile()
 
 
53
 
54
- # Offloading capacity (RAM)
55
- if ENABLE_CPU_OFFLOAD:
56
- pipe.enable_model_cpu_offload()
57
 
58
  MAX_SEED = np.iinfo(np.int32).max
59
-
60
- def save_image(img):
61
- unique_name = str(uuid.uuid4()) + ".png"
62
- img.save(unique_name)
63
- return unique_name
64
-
65
- def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  if randomize_seed:
67
  seed = random.randint(0, MAX_SEED)
68
- return seed
69
-
70
- @spaces.GPU(duration=60, enable_queue=True)
71
- def generate(
72
- prompt: str,
73
- negative_prompt: str = "",
74
- use_negative_prompt: bool = False,
75
- seed: int = 1,
76
- width: int = 1024,
77
- height: int = 1024,
78
- guidance_scale: float = 3,
79
- num_inference_steps: int = 25,
80
- randomize_seed: bool = False,
81
- use_resolution_binning: bool = True,
82
- num_images: int = 1, # Number of images to generate
83
- progress=gr.Progress(track_tqdm=True),
84
- ):
85
- seed = int(randomize_seed_fn(seed, randomize_seed))
86
  generator = torch.Generator(device=device).manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- #Options
89
- options = {
90
- "prompt": [prompt] * num_images,
91
- "negative_prompt": [negative_prompt] * num_images if use_negative_prompt else None,
92
- "width": width,
93
- "height": height,
94
- "guidance_scale": guidance_scale,
95
- "num_inference_steps": num_inference_steps,
96
- "generator": generator,
97
- "output_type": "pil",
98
- }
99
-
100
- if use_resolution_binning:
101
- options["use_resolution_binning"] = True
102
-
103
- #Images potential batches
104
- images = []
105
- for i in range(0, num_images, BATCH_SIZE):
106
- batch_options = options.copy()
107
- batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
108
- if "negative_prompt" in batch_options:
109
- batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
110
- images.extend(pipe(**batch_options).images)
111
-
112
- image_paths = [save_image(img) for img in images]
113
- return image_paths, seed
114
-
115
- with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
116
- gr.Markdown(DESCRIPTIONx)
117
- with gr.Group():
118
- with gr.Row():
119
- prompt = gr.Text(
120
- label="Prompt",
121
- show_label=False,
122
- max_lines=1,
123
- placeholder="Enter your prompt",
124
- container=False,
125
- )
126
- run_button = gr.Button("Run", scale=0)
127
- result = gr.Gallery(label="Result", columns=1, show_label=False)
128
- with gr.Accordion("Advanced options", open=False, visible=False):
129
- num_images = gr.Slider(
130
- label="Number of Images",
131
- minimum=1,
132
- maximum=4,
133
- step=1,
134
- value=1,
135
- )
136
- with gr.Row():
137
- use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
138
- negative_prompt = gr.Text(
139
- label="Negative prompt",
140
- max_lines=5,
141
- lines=4,
142
- placeholder="Enter a negative prompt",
143
- value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
144
- visible=True,
145
- )
146
- seed = gr.Slider(
147
- label="Seed",
148
- minimum=0,
149
- maximum=MAX_SEED,
150
- step=1,
151
- value=0,
152
- )
153
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
154
- with gr.Row(visible=True):
155
- width = gr.Slider(
156
- label="Width",
157
- minimum=512,
158
- maximum=MAX_IMAGE_SIZE,
159
- step=64,
160
- value=1024,
161
- )
162
- height = gr.Slider(
163
- label="Height",
164
- minimum=512,
165
- maximum=MAX_IMAGE_SIZE,
166
- step=64,
167
- value=1024,
168
- )
169
- with gr.Row():
170
- guidance_scale = gr.Slider(
171
- label="Guidance Scale",
172
- minimum=0.1,
173
- maximum=6,
174
- step=0.1,
175
- value=3.0,
176
- )
177
- num_inference_steps = gr.Slider(
178
- label="Number of inference steps",
179
- minimum=1,
180
- maximum=25,
181
- step=1,
182
- value=23,
183
- )
184
-
185
- gr.Examples(
186
- examples=examples,
187
- inputs=prompt,
188
- cache_examples=False
189
- )
190
-
191
- use_negative_prompt.change(
192
- fn=lambda x: gr.update(visible=x),
193
- inputs=use_negative_prompt,
194
- outputs=negative_prompt,
195
- api_name=False,
196
- )
197
 
198
- gr.on(
199
- triggers=[
200
- prompt.submit,
201
- negative_prompt.submit,
202
- run_button.click,
203
- ],
204
- fn=generate,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  inputs=[
206
- prompt,
207
- negative_prompt,
208
- use_negative_prompt,
209
- seed,
210
- width,
211
- height,
212
- guidance_scale,
213
- num_inference_steps,
214
- randomize_seed,
215
- num_images
216
  ],
217
- outputs=[result, seed],
218
- api_name="stable_hamster",
219
- )
220
 
221
- if __name__ == "__main__":
222
- demo.queue(max_size=40).launch()
 
 
 
 
 
 
 
 
1
  import spaces
2
+ import gradio as gr
3
  import torch
4
+ from PIL import Image
5
+ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline, Qwen2VLForConditionalGeneration
6
+ from diffusers import DiffusionPipeline
7
+ import random
8
+ import numpy as np
9
+ import os
10
+ import subprocess
11
+ from qwen_vl_utils import process_vision_info
12
+ from threading import Thread
13
+ import uuid
14
+ import io
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Initialize models
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ dtype = torch.bfloat16
19
 
20
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
 
 
 
21
 
22
+ # FLUX.1-dev model
23
+ pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, token=huggingface_token).to(device)
 
 
 
 
 
 
 
24
 
25
+ # Initialize Qwen2VL model
26
+ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
27
+ "prithivMLmods/JSONify-Flux", trust_remote_code=True, torch_dtype=torch.float16
28
+ ).to(device).eval()
29
+ qwen_processor = AutoProcessor.from_pretrained("prithivMLmods/JSONify-Flux", trust_remote_code=True)
30
 
31
+ # Prompt Enhancer
32
+ enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device=device)
 
33
 
34
  MAX_SEED = np.iinfo(np.int32).max
35
+ MAX_IMAGE_SIZE = 2048
36
+
37
+ # Qwen2VL caption function
38
+ @spaces.GPU
39
+ def qwen_caption(image):
40
+ # Convert image to PIL if it's not already
41
+ if not isinstance(image, Image.Image):
42
+ image = Image.fromarray(image)
43
+
44
+ messages = [
45
+ {
46
+ "role": "user",
47
+ "content": [
48
+ {"type": "image", "image": image},
49
+ {"type": "text", "text": "Caption the image"},
50
+ ],
51
+ }
52
+ ]
53
+
54
+ text = qwen_processor.apply_chat_template(
55
+ messages, tokenize=False, add_generation_prompt=True
56
+ )
57
+ image_inputs, video_inputs = process_vision_info(messages)
58
+ inputs = qwen_processor(
59
+ text=[text],
60
+ images=image_inputs,
61
+ videos=video_inputs,
62
+ padding=True,
63
+ return_tensors="pt",
64
+ ).to(device)
65
+
66
+ generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
67
+ generated_ids_trimmed = [
68
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
69
+ ]
70
+ output_text = qwen_processor.batch_decode(
71
+ generated_ids_trimmed,
72
+ skip_special_tokens=True,
73
+ clean_up_tokenization_spaces=False,
74
+ )[0]
75
+
76
+ return output_text
77
+
78
+ # Prompt Enhancer function
79
+ def enhance_prompt(input_prompt):
80
+ result = enhancer_long("Enhance the description: " + input_prompt)
81
+ enhanced_text = result[0]['summary_text']
82
+ return enhanced_text
83
+
84
+ @spaces.GPU(duration=190)
85
+ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
86
+ if image is not None:
87
+ # Convert image to PIL if it's not already
88
+ if not isinstance(image, Image.Image):
89
+ image = Image.fromarray(image)
90
+
91
+ prompt = qwen_caption(image)
92
+ print(prompt)
93
+ else:
94
+ prompt = text_prompt
95
+
96
+ if use_enhancer:
97
+ prompt = enhance_prompt(prompt)
98
+
99
  if randomize_seed:
100
  seed = random.randint(0, MAX_SEED)
101
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  generator = torch.Generator(device=device).manual_seed(seed)
103
+
104
+ image = pipe(
105
+ prompt=prompt,
106
+ generator=generator,
107
+ num_inference_steps=num_inference_steps,
108
+ width=width,
109
+ height=height,
110
+ guidance_scale=guidance_scale
111
+ ).images[0]
112
+
113
+ return image, prompt, seed
114
+
115
+ custom_css = """
116
+ .input-group, .output-group {
117
+ border: 1px solid #e0e0e0;
118
+ border-radius: 10px;
119
+ padding: 20px;
120
+ margin-bottom: 20px;
121
+ background-color: #f9f9f9;
122
+ }
123
+ .submit-btn {
124
+ background-color: #2980b9 !important;
125
+ color: white !important;
126
+ }
127
+ .submit-btn:hover {
128
+ background-color: #3498db !important;
129
+ }
130
+ """
131
 
132
+ title = """<h1 align="center">FLUX.1-dev with Qwen2VL Captioner and Prompt Enhancer</h1>
133
+ <p><center>
134
+ <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" target="_blank">[FLUX.1-dev Model]</a>
135
+ <a href="https://huggingface.co/prithivMLmods/JSONify-Flux" target="_blank">[Qwen2VL Model]</a>
136
+ <a href="https://huggingface.co/gokaygokay/Lamini-Prompt-Enchance-Long" target="_blank">[Prompt Enhancer Long]</a>
137
+ <p align="center">Create long prompts from images or enhance your short prompts with prompt enhancer</p>
138
+ </center></p>
139
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray")) as demo:
142
+ gr.HTML(title)
143
+
144
+ with gr.Row():
145
+ with gr.Column(scale=1):
146
+ with gr.Group(elem_classes="input-group"):
147
+ input_image = gr.Image(label="Input Image (Qwen2VL Captioner)")
148
+
149
+ with gr.Accordion("Advanced Settings", open=False):
150
+ text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")
151
+ use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
152
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
153
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
154
+ width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
155
+ height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
156
+ guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
157
+ num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=28)
158
+
159
+ generate_btn = gr.Button("Generate Image", elem_classes="submit-btn")
160
+
161
+ with gr.Column(scale=1):
162
+ with gr.Group(elem_classes="output-group"):
163
+ output_image = gr.Image(label="Result", elem_id="gallery", show_label=False)
164
+ final_prompt = gr.Textbox(label="Final Prompt Used")
165
+ used_seed = gr.Number(label="Seed Used")
166
+
167
+ generate_btn.click(
168
+ fn=process_workflow,
169
  inputs=[
170
+ input_image, text_prompt, use_enhancer, seed, randomize_seed,
171
+ width, height, guidance_scale, num_inference_steps
 
 
 
 
 
 
 
 
172
  ],
173
+ outputs=[output_image, final_prompt, used_seed]
174
+ )
 
175
 
176
+ demo.launch(debug=True)