prithivMLmods commited on
Commit
6b8f8c9
·
verified ·
1 Parent(s): 7af58fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -7,11 +7,7 @@ from diffusers import DiffusionPipeline
7
  import random
8
  import numpy as np
9
  import os
10
- import subprocess
11
  from qwen_vl_utils import process_vision_info
12
- from threading import Thread
13
- import uuid
14
- import io
15
 
16
  # Initialize models
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -36,10 +32,10 @@ enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchan
36
  MAX_SEED = np.iinfo(np.int32).max
37
  MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
38
 
39
- # Qwen2VL caption function
40
  @spaces.GPU
41
  def qwen_caption(image):
42
- # Convert image to PIL if it's not already
43
  if not isinstance(image, Image.Image):
44
  image = Image.fromarray(image)
45
 
@@ -48,7 +44,8 @@ def qwen_caption(image):
48
  "role": "user",
49
  "content": [
50
  {"type": "image", "image": image},
51
- {"type": "text", "text": "Generate a detailed and optimized caption for the given image in the form of JSON data {}"},
 
52
  ],
53
  }
54
  ]
@@ -77,7 +74,7 @@ def qwen_caption(image):
77
 
78
  return output_text
79
 
80
- # Prompt Enhancer function
81
  def enhance_prompt(input_prompt):
82
  result = enhancer_long("Enhance the description: " + input_prompt)
83
  enhanced_text = result[0]['summary_text']
@@ -86,10 +83,8 @@ def enhance_prompt(input_prompt):
86
  @spaces.GPU(duration=190)
87
  def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
88
  if image is not None:
89
- # Convert image to PIL if it's not already
90
  if not isinstance(image, Image.Image):
91
  image = Image.fromarray(image)
92
-
93
  prompt = qwen_caption(image)
94
  print(prompt)
95
  else:
@@ -103,10 +98,9 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
103
 
104
  generator = torch.Generator(device=device).manual_seed(seed)
105
 
106
- # Reduce memory usage by clearing GPU cache
107
  torch.cuda.empty_cache()
108
 
109
- # Generate image with FLUX.1-dev
110
  try:
111
  image = pipe(
112
  prompt=prompt,
@@ -164,10 +158,10 @@ with gr.Blocks(css=custom_css) as demo:
164
  use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
165
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
166
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
167
- width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512) # Reduced default width
168
- height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512) # Reduced default height
169
  guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
170
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20) # Reduced default steps
171
 
172
  generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
173
 
 
7
  import random
8
  import numpy as np
9
  import os
 
10
  from qwen_vl_utils import process_vision_info
 
 
 
11
 
12
  # Initialize models
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
32
  MAX_SEED = np.iinfo(np.int32).max
33
  MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
34
 
35
+ # Qwen2VL caption function – updated to request plain text caption instead of JSON
36
  @spaces.GPU
37
  def qwen_caption(image):
38
+ # Convert image to PIL if needed
39
  if not isinstance(image, Image.Image):
40
  image = Image.fromarray(image)
41
 
 
44
  "role": "user",
45
  "content": [
46
  {"type": "image", "image": image},
47
+ # Removed "in the form of JSON data {}" to get plain text caption
48
+ {"type": "text", "text": "Generate a detailed and optimized caption for the given image."},
49
  ],
50
  }
51
  ]
 
74
 
75
  return output_text
76
 
77
+ # Prompt Enhancer function (unchanged)
78
  def enhance_prompt(input_prompt):
79
  result = enhancer_long("Enhance the description: " + input_prompt)
80
  enhanced_text = result[0]['summary_text']
 
83
  @spaces.GPU(duration=190)
84
  def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
85
  if image is not None:
 
86
  if not isinstance(image, Image.Image):
87
  image = Image.fromarray(image)
 
88
  prompt = qwen_caption(image)
89
  print(prompt)
90
  else:
 
98
 
99
  generator = torch.Generator(device=device).manual_seed(seed)
100
 
101
+ # Clear GPU cache before generating the image
102
  torch.cuda.empty_cache()
103
 
 
104
  try:
105
  image = pipe(
106
  prompt=prompt,
 
158
  use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
159
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
160
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
161
+ width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
162
+ height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
163
  guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
164
+ num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20)
165
 
166
  generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
167