Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,7 @@ from diffusers import DiffusionPipeline
|
|
7 |
import random
|
8 |
import numpy as np
|
9 |
import os
|
10 |
-
import subprocess
|
11 |
from qwen_vl_utils import process_vision_info
|
12 |
-
from threading import Thread
|
13 |
-
import uuid
|
14 |
-
import io
|
15 |
|
16 |
# Initialize models
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -36,10 +32,10 @@ enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchan
|
|
36 |
MAX_SEED = np.iinfo(np.int32).max
|
37 |
MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
|
38 |
|
39 |
-
# Qwen2VL caption function
|
40 |
@spaces.GPU
|
41 |
def qwen_caption(image):
|
42 |
-
# Convert image to PIL if
|
43 |
if not isinstance(image, Image.Image):
|
44 |
image = Image.fromarray(image)
|
45 |
|
@@ -48,7 +44,8 @@ def qwen_caption(image):
|
|
48 |
"role": "user",
|
49 |
"content": [
|
50 |
{"type": "image", "image": image},
|
51 |
-
|
|
|
52 |
],
|
53 |
}
|
54 |
]
|
@@ -77,7 +74,7 @@ def qwen_caption(image):
|
|
77 |
|
78 |
return output_text
|
79 |
|
80 |
-
# Prompt Enhancer function
|
81 |
def enhance_prompt(input_prompt):
|
82 |
result = enhancer_long("Enhance the description: " + input_prompt)
|
83 |
enhanced_text = result[0]['summary_text']
|
@@ -86,10 +83,8 @@ def enhance_prompt(input_prompt):
|
|
86 |
@spaces.GPU(duration=190)
|
87 |
def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
|
88 |
if image is not None:
|
89 |
-
# Convert image to PIL if it's not already
|
90 |
if not isinstance(image, Image.Image):
|
91 |
image = Image.fromarray(image)
|
92 |
-
|
93 |
prompt = qwen_caption(image)
|
94 |
print(prompt)
|
95 |
else:
|
@@ -103,10 +98,9 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
|
|
103 |
|
104 |
generator = torch.Generator(device=device).manual_seed(seed)
|
105 |
|
106 |
-
#
|
107 |
torch.cuda.empty_cache()
|
108 |
|
109 |
-
# Generate image with FLUX.1-dev
|
110 |
try:
|
111 |
image = pipe(
|
112 |
prompt=prompt,
|
@@ -164,10 +158,10 @@ with gr.Blocks(css=custom_css) as demo:
|
|
164 |
use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
|
165 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
166 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
167 |
-
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
168 |
-
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
169 |
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
|
170 |
-
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20)
|
171 |
|
172 |
generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
|
173 |
|
|
|
7 |
import random
|
8 |
import numpy as np
|
9 |
import os
|
|
|
10 |
from qwen_vl_utils import process_vision_info
|
|
|
|
|
|
|
11 |
|
12 |
# Initialize models
|
13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
32 |
MAX_SEED = np.iinfo(np.int32).max
|
33 |
MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
|
34 |
|
35 |
+
# Qwen2VL caption function – updated to request plain text caption instead of JSON
|
36 |
@spaces.GPU
|
37 |
def qwen_caption(image):
|
38 |
+
# Convert image to PIL if needed
|
39 |
if not isinstance(image, Image.Image):
|
40 |
image = Image.fromarray(image)
|
41 |
|
|
|
44 |
"role": "user",
|
45 |
"content": [
|
46 |
{"type": "image", "image": image},
|
47 |
+
# Removed "in the form of JSON data {}" to get plain text caption
|
48 |
+
{"type": "text", "text": "Generate a detailed and optimized caption for the given image."},
|
49 |
],
|
50 |
}
|
51 |
]
|
|
|
74 |
|
75 |
return output_text
|
76 |
|
77 |
+
# Prompt Enhancer function (unchanged)
|
78 |
def enhance_prompt(input_prompt):
|
79 |
result = enhancer_long("Enhance the description: " + input_prompt)
|
80 |
enhanced_text = result[0]['summary_text']
|
|
|
83 |
@spaces.GPU(duration=190)
|
84 |
def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
|
85 |
if image is not None:
|
|
|
86 |
if not isinstance(image, Image.Image):
|
87 |
image = Image.fromarray(image)
|
|
|
88 |
prompt = qwen_caption(image)
|
89 |
print(prompt)
|
90 |
else:
|
|
|
98 |
|
99 |
generator = torch.Generator(device=device).manual_seed(seed)
|
100 |
|
101 |
+
# Clear GPU cache before generating the image
|
102 |
torch.cuda.empty_cache()
|
103 |
|
|
|
104 |
try:
|
105 |
image = pipe(
|
106 |
prompt=prompt,
|
|
|
158 |
use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
|
159 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
160 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
161 |
+
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
162 |
+
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
163 |
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
|
164 |
+
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20)
|
165 |
|
166 |
generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
|
167 |
|