Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,103 +2,51 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import os
|
4 |
import glob
|
5 |
-
import numpy as np
|
6 |
from datetime import datetime
|
7 |
from PIL import Image
|
8 |
from diffusers.utils import load_image
|
9 |
from diffusers import EulerDiscreteScheduler
|
10 |
from pipline_StableDiffusion_ConsistentID import ConsistentIDStableDiffusionPipeline
|
11 |
-
import
|
12 |
-
sys.path.append("./models/LLaVA")
|
13 |
-
from llava.model.builder import load_pretrained_model
|
14 |
-
from llava.mm_utils import get_model_name_from_path
|
15 |
-
from llava.eval.run_llava import eval_model
|
16 |
-
|
17 |
-
# Load Lava for prompt enhancement
|
18 |
-
llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
|
19 |
-
llva_tokenizer, llva_model, llva_image_processor, llva_context_len = load_pretrained_model(
|
20 |
-
model_path=llva_model_path,
|
21 |
-
model_base=None,
|
22 |
-
model_name=get_model_name_from_path(llva_model_path),)
|
23 |
-
|
24 |
-
|
25 |
-
@torch.inference_mode()
|
26 |
-
def Enhance_prompt(prompt,select_images):
|
27 |
-
|
28 |
-
llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
|
29 |
-
args = type('Args', (), {
|
30 |
-
"model_path": llva_model_path,
|
31 |
-
"model_base": None,
|
32 |
-
"model_name": get_model_name_from_path(llva_model_path),
|
33 |
-
"query": llva_prompt,
|
34 |
-
"conv_mode": None,
|
35 |
-
"image_file": select_images,
|
36 |
-
"sep": ",",
|
37 |
-
"temperature": 0,
|
38 |
-
"top_p": None,
|
39 |
-
"num_beams": 1,
|
40 |
-
"max_new_tokens": 512
|
41 |
-
})()
|
42 |
-
Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
|
43 |
-
|
44 |
-
return Enhanced_prompt
|
45 |
-
|
46 |
-
# print(gr.__version__)
|
47 |
-
# 4.16.0
|
48 |
|
|
|
|
|
49 |
|
|
|
50 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|
51 |
-
device = "cuda"
|
52 |
-
# TODO
|
53 |
-
base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE" # TODO
|
54 |
-
consistentID_path = "JackAILab/ConsistentID" # TODO
|
55 |
-
|
56 |
-
### Load base model
|
57 |
-
pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
|
58 |
-
base_model_path,
|
59 |
-
torch_dtype=torch.float16,
|
60 |
-
use_safetensors=True,
|
61 |
-
variant="fp16"
|
62 |
-
).to(device)
|
63 |
-
|
64 |
-
### Load consistentID_model checkpoint
|
65 |
-
pipe.load_ConsistentID_model(
|
66 |
-
os.path.dirname(consistentID_path),
|
67 |
-
subfolder="",
|
68 |
-
weight_name=os.path.basename(consistentID_path),
|
69 |
-
trigger_word="img",
|
70 |
-
)
|
71 |
-
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
72 |
-
|
73 |
-
def process(selected_template_images,costum_image,prompt
|
74 |
-
,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps):
|
75 |
-
|
76 |
-
if model_selected_tab==0:
|
77 |
-
select_images = load_image(Image.open(selected_template_images))
|
78 |
-
else:
|
79 |
-
select_images = load_image(Image.fromarray(costum_image))
|
80 |
-
|
81 |
-
if prompt_selected_tab==0:
|
82 |
-
prompt = prompt_selected
|
83 |
-
negative_prompt = ""
|
84 |
-
need_safetycheck = False
|
85 |
-
else:
|
86 |
-
need_safetycheck = True
|
87 |
-
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# hyper-parameter
|
|
|
90 |
num_steps = 50
|
91 |
-
|
92 |
|
93 |
|
94 |
if prompt == "":
|
95 |
-
prompt = "A man, in a forest"
|
96 |
-
prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
|
97 |
-
prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
|
98 |
-
else:
|
99 |
-
prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
|
100 |
-
print(prompt)
|
101 |
-
pass
|
102 |
|
103 |
if negative_prompt == "":
|
104 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
|
@@ -106,7 +54,7 @@ def process(selected_template_images,costum_image,prompt
|
|
106 |
#Extend Prompt
|
107 |
prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
|
108 |
|
109 |
-
negtive_prompt_group="((
|
110 |
negative_prompt = negative_prompt + negtive_prompt_group
|
111 |
|
112 |
seed = torch.randint(0, 1000, (1,)).item()
|
@@ -114,90 +62,47 @@ def process(selected_template_images,costum_image,prompt
|
|
114 |
|
115 |
images = pipe(
|
116 |
prompt=prompt,
|
117 |
-
width=
|
118 |
-
height=
|
119 |
input_id_images=select_images,
|
120 |
negative_prompt=negative_prompt,
|
121 |
num_images_per_prompt=1,
|
122 |
num_inference_steps=num_steps,
|
123 |
start_merge_step=merge_steps,
|
124 |
generator=generator,
|
125 |
-
retouching=retouching,
|
126 |
-
need_safetycheck=need_safetycheck,
|
127 |
).images[0]
|
128 |
|
129 |
current_date = datetime.today()
|
130 |
-
return np.array(images)
|
131 |
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
with gr.Column():
|
165 |
-
prompt_selected_tab = gr.State(0)
|
166 |
-
with gr.TabItem("template prompts") as template_prompts_tab:
|
167 |
-
prompt_selected = gr.Dropdown(value="A person, police officer, half body shot", elem_id='dropdown', choices=[
|
168 |
-
"A woman in a wedding dress",
|
169 |
-
"A woman, queen, in a gorgeous palace",
|
170 |
-
"A man sitting at the beach with sunset",
|
171 |
-
"A person, police officer, half body shot",
|
172 |
-
"A man, sailor, in a boat above ocean",
|
173 |
-
"A women wearing headphone, listening music",
|
174 |
-
"A man, firefighter, half body shot"], label=f"prepared prompts")
|
175 |
-
|
176 |
-
with gr.TabItem("custom prompt") as custom_prompt_tab:
|
177 |
-
prompt = gr.Textbox(label="prompt",placeholder="A man/woman wearing a santa hat")
|
178 |
-
nagetive_prompt = gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry")
|
179 |
-
|
180 |
-
prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
|
181 |
-
for i, tab in enumerate(prompt_selected_tabs):
|
182 |
-
tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
|
183 |
-
|
184 |
-
retouching = gr.Checkbox(label="face retouching",value=False)
|
185 |
-
width = gr.Slider(label="image width",minimum=256,maximum=768,value=512,step=8)
|
186 |
-
height = gr.Slider(label="image height",minimum=256,maximum=768,value=768,step=8)
|
187 |
-
width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
|
188 |
-
height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
|
189 |
-
merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
|
190 |
-
|
191 |
-
btn = gr.Button("Run")
|
192 |
-
with gr.Column():
|
193 |
-
out = gr.Image(label="Output")
|
194 |
-
gr.Markdown('''
|
195 |
-
N.B.:<br/>
|
196 |
-
- If the proportion of face in the image is too small, the probability of an error will be slightly higher, and the similarity will also significantly decrease.)
|
197 |
-
- At the same time, use prompt with \"man\" or \"woman\" instead of \"person\" as much as possible, as that may cause the model to be confused whether the protagonist is male or female.
|
198 |
-
- Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
|
199 |
-
''')
|
200 |
-
btn.click(fn=process, inputs=[selected_template_images,costum_image,prompt,nagetive_prompt,prompt_selected,retouching
|
201 |
-
,model_selected_tab,prompt_selected_tab,width,height,merge_steps], outputs=out)
|
202 |
-
|
203 |
-
demo.launch()
|
|
|
2 |
import torch
|
3 |
import os
|
4 |
import glob
|
|
|
5 |
from datetime import datetime
|
6 |
from PIL import Image
|
7 |
from diffusers.utils import load_image
|
8 |
from diffusers import EulerDiscreteScheduler
|
9 |
from pipline_StableDiffusion_ConsistentID import ConsistentIDStableDiffusionPipeline
|
10 |
+
import spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
zero = torch.Tensor([0]).cuda()
|
13 |
+
print(zero.device) # <-- 'cpu' 🤔
|
14 |
|
15 |
+
# Gets the absolute path of the current script
|
16 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
@spaces.GPU
|
19 |
+
def process(inputImage,prompt,negative_prompt):
|
20 |
+
|
21 |
+
device = zero.device # "cuda"
|
22 |
+
base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
|
23 |
+
consistentID_path = "JackAILab/ConsistentID/ConsistentID-v1.bin"
|
24 |
+
|
25 |
+
### Load base model
|
26 |
+
pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
|
27 |
+
base_model_path,
|
28 |
+
torch_dtype=torch.float16,
|
29 |
+
use_safetensors=True,
|
30 |
+
variant="fp16"
|
31 |
+
).to(device)
|
32 |
+
|
33 |
+
### Load consistentID_model checkpoint
|
34 |
+
pipe.load_ConsistentID_model(
|
35 |
+
os.path.dirname(consistentID_path),
|
36 |
+
subfolder="",
|
37 |
+
weight_name=os.path.basename(consistentID_path),
|
38 |
+
trigger_word="img",
|
39 |
+
)
|
40 |
+
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
41 |
+
|
42 |
# hyper-parameter
|
43 |
+
select_images = load_image(Image.fromarray(inputImage))
|
44 |
num_steps = 50
|
45 |
+
merge_steps = 30
|
46 |
|
47 |
|
48 |
if prompt == "":
|
49 |
+
prompt = "A man, in a forest, adventuring"
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
if negative_prompt == "":
|
52 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
|
|
|
54 |
#Extend Prompt
|
55 |
prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
|
56 |
|
57 |
+
negtive_prompt_group="((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
|
58 |
negative_prompt = negative_prompt + negtive_prompt_group
|
59 |
|
60 |
seed = torch.randint(0, 1000, (1,)).item()
|
|
|
62 |
|
63 |
images = pipe(
|
64 |
prompt=prompt,
|
65 |
+
width=512,
|
66 |
+
height=512,
|
67 |
input_id_images=select_images,
|
68 |
negative_prompt=negative_prompt,
|
69 |
num_images_per_prompt=1,
|
70 |
num_inference_steps=num_steps,
|
71 |
start_merge_step=merge_steps,
|
72 |
generator=generator,
|
|
|
|
|
73 |
).images[0]
|
74 |
|
75 |
current_date = datetime.today()
|
|
|
76 |
|
77 |
+
output_dir = script_directory + f"/images/gradio_outputs"
|
78 |
+
if not os.path.exists(output_dir):
|
79 |
+
os.makedirs(output_dir)
|
80 |
+
|
81 |
+
images.save(os.path.join(output_dir, f"{current_date}-{seed}.jpg"))
|
82 |
+
|
83 |
+
return os.path.join(output_dir, f"{current_date}-{seed}.jpg")
|
84 |
+
|
85 |
+
|
86 |
+
iface = gr.Interface(
|
87 |
+
fn=process,
|
88 |
+
inputs=[
|
89 |
+
gr.Image(label="Upload Image"),
|
90 |
+
gr.Textbox(label="prompt",placeholder="A man, in a forest, adventuring"),
|
91 |
+
gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry"),
|
92 |
+
],
|
93 |
+
outputs=[
|
94 |
+
gr.Image(label="Output"),
|
95 |
+
],
|
96 |
+
title="ConsistentID Demo",
|
97 |
+
description="Put reference portrait below"
|
98 |
+
)
|
99 |
+
|
100 |
+
iface.launch() # zero.device
|
101 |
+
|
102 |
+
# @spaces.GPU
|
103 |
+
# def greet(n):
|
104 |
+
# print(zero.device) # <-- 'cuda:0' 🤗
|
105 |
+
# return f"Hello {zero + n} Tensor"
|
106 |
+
|
107 |
+
# demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
|
108 |
+
# demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|