JackAILab commited on
Commit
7bdb7e9
·
verified ·
1 Parent(s): 621a27c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -161
app.py CHANGED
@@ -2,103 +2,51 @@ import gradio as gr
2
  import torch
3
  import os
4
  import glob
5
- import numpy as np
6
  from datetime import datetime
7
  from PIL import Image
8
  from diffusers.utils import load_image
9
  from diffusers import EulerDiscreteScheduler
10
  from pipline_StableDiffusion_ConsistentID import ConsistentIDStableDiffusionPipeline
11
- import sys
12
- sys.path.append("./models/LLaVA")
13
- from llava.model.builder import load_pretrained_model
14
- from llava.mm_utils import get_model_name_from_path
15
- from llava.eval.run_llava import eval_model
16
-
17
- # Load Lava for prompt enhancement
18
- llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
19
- llva_tokenizer, llva_model, llva_image_processor, llva_context_len = load_pretrained_model(
20
- model_path=llva_model_path,
21
- model_base=None,
22
- model_name=get_model_name_from_path(llva_model_path),)
23
-
24
-
25
- @torch.inference_mode()
26
- def Enhance_prompt(prompt,select_images):
27
-
28
- llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
29
- args = type('Args', (), {
30
- "model_path": llva_model_path,
31
- "model_base": None,
32
- "model_name": get_model_name_from_path(llva_model_path),
33
- "query": llva_prompt,
34
- "conv_mode": None,
35
- "image_file": select_images,
36
- "sep": ",",
37
- "temperature": 0,
38
- "top_p": None,
39
- "num_beams": 1,
40
- "max_new_tokens": 512
41
- })()
42
- Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
43
-
44
- return Enhanced_prompt
45
-
46
- # print(gr.__version__)
47
- # 4.16.0
48
 
 
 
49
 
 
50
  script_directory = os.path.dirname(os.path.realpath(__file__))
51
- device = "cuda"
52
- # TODO
53
- base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE" # TODO
54
- consistentID_path = "JackAILab/ConsistentID" # TODO
55
-
56
- ### Load base model
57
- pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
58
- base_model_path,
59
- torch_dtype=torch.float16,
60
- use_safetensors=True,
61
- variant="fp16"
62
- ).to(device)
63
-
64
- ### Load consistentID_model checkpoint
65
- pipe.load_ConsistentID_model(
66
- os.path.dirname(consistentID_path),
67
- subfolder="",
68
- weight_name=os.path.basename(consistentID_path),
69
- trigger_word="img",
70
- )
71
- pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
72
-
73
- def process(selected_template_images,costum_image,prompt
74
- ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps):
75
-
76
- if model_selected_tab==0:
77
- select_images = load_image(Image.open(selected_template_images))
78
- else:
79
- select_images = load_image(Image.fromarray(costum_image))
80
-
81
- if prompt_selected_tab==0:
82
- prompt = prompt_selected
83
- negative_prompt = ""
84
- need_safetycheck = False
85
- else:
86
- need_safetycheck = True
87
-
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # hyper-parameter
 
90
  num_steps = 50
91
- # merge_steps = 30
92
 
93
 
94
  if prompt == "":
95
- prompt = "A man, in a forest"
96
- prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
97
- prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
98
- else:
99
- prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
100
- print(prompt)
101
- pass
102
 
103
  if negative_prompt == "":
104
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
@@ -106,7 +54,7 @@ def process(selected_template_images,costum_image,prompt
106
  #Extend Prompt
107
  prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
108
 
109
- negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
110
  negative_prompt = negative_prompt + negtive_prompt_group
111
 
112
  seed = torch.randint(0, 1000, (1,)).item()
@@ -114,90 +62,47 @@ def process(selected_template_images,costum_image,prompt
114
 
115
  images = pipe(
116
  prompt=prompt,
117
- width=width,
118
- height=height,
119
  input_id_images=select_images,
120
  negative_prompt=negative_prompt,
121
  num_images_per_prompt=1,
122
  num_inference_steps=num_steps,
123
  start_merge_step=merge_steps,
124
  generator=generator,
125
- retouching=retouching,
126
- need_safetycheck=need_safetycheck,
127
  ).images[0]
128
 
129
  current_date = datetime.today()
130
- return np.array(images)
131
 
132
- # Gets the templates
133
- script_directory = os.path.dirname(os.path.realpath(__file__))
134
- preset_template = glob.glob("./images/templates/*.png")
135
- preset_template = preset_template + glob.glob("./images/templates/*.jpg")
136
-
137
-
138
- with gr.Blocks(title="ConsistentID Demo") as demo:
139
- gr.Markdown("# ConsistentID Demo")
140
- gr.Markdown("\
141
- Put the reference figure to be redrawn into the box below (There is a small probability of referensing failure. You can submit it repeatedly)")
142
- gr.Markdown("\
143
- If you find our work interesting, please leave a star in GitHub for us!<br>\
144
- https://github.com/JackAILab/ConsistentID")
145
- with gr.Row():
146
- with gr.Column():
147
- model_selected_tab = gr.State(0)
148
- with gr.TabItem("template images") as template_images_tab:
149
- template_gallery_list = [(i, i) for i in preset_template]
150
- gallery = gr.Gallery(template_gallery_list,columns=[4], rows=[2], object_fit="contain", height="auto",show_label=False)
151
-
152
- def select_function(evt: gr.SelectData):
153
- return preset_template[evt.index]
154
-
155
- selected_template_images = gr.Text(show_label=False, visible=False, placeholder="Selected")
156
- gallery.select(select_function, None, selected_template_images)
157
- with gr.TabItem("Upload Image") as upload_image_tab:
158
- costum_image = gr.Image(label="Upload Image")
159
-
160
- model_selected_tabs = [template_images_tab, upload_image_tab]
161
- for i, tab in enumerate(model_selected_tabs):
162
- tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[model_selected_tab])
163
-
164
- with gr.Column():
165
- prompt_selected_tab = gr.State(0)
166
- with gr.TabItem("template prompts") as template_prompts_tab:
167
- prompt_selected = gr.Dropdown(value="A person, police officer, half body shot", elem_id='dropdown', choices=[
168
- "A woman in a wedding dress",
169
- "A woman, queen, in a gorgeous palace",
170
- "A man sitting at the beach with sunset",
171
- "A person, police officer, half body shot",
172
- "A man, sailor, in a boat above ocean",
173
- "A women wearing headphone, listening music",
174
- "A man, firefighter, half body shot"], label=f"prepared prompts")
175
-
176
- with gr.TabItem("custom prompt") as custom_prompt_tab:
177
- prompt = gr.Textbox(label="prompt",placeholder="A man/woman wearing a santa hat")
178
- nagetive_prompt = gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry")
179
-
180
- prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
181
- for i, tab in enumerate(prompt_selected_tabs):
182
- tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
183
-
184
- retouching = gr.Checkbox(label="face retouching",value=False)
185
- width = gr.Slider(label="image width",minimum=256,maximum=768,value=512,step=8)
186
- height = gr.Slider(label="image height",minimum=256,maximum=768,value=768,step=8)
187
- width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
188
- height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
189
- merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
190
-
191
- btn = gr.Button("Run")
192
- with gr.Column():
193
- out = gr.Image(label="Output")
194
- gr.Markdown('''
195
- N.B.:<br/>
196
- - If the proportion of face in the image is too small, the probability of an error will be slightly higher, and the similarity will also significantly decrease.)
197
- - At the same time, use prompt with \"man\" or \"woman\" instead of \"person\" as much as possible, as that may cause the model to be confused whether the protagonist is male or female.
198
- - Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
199
- ''')
200
- btn.click(fn=process, inputs=[selected_template_images,costum_image,prompt,nagetive_prompt,prompt_selected,retouching
201
- ,model_selected_tab,prompt_selected_tab,width,height,merge_steps], outputs=out)
202
-
203
- demo.launch()
 
2
  import torch
3
  import os
4
  import glob
 
5
  from datetime import datetime
6
  from PIL import Image
7
  from diffusers.utils import load_image
8
  from diffusers import EulerDiscreteScheduler
9
  from pipline_StableDiffusion_ConsistentID import ConsistentIDStableDiffusionPipeline
10
+ import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ zero = torch.Tensor([0]).cuda()
13
+ print(zero.device) # <-- 'cpu' 🤔
14
 
15
+ # Gets the absolute path of the current script
16
  script_directory = os.path.dirname(os.path.realpath(__file__))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ @spaces.GPU
19
+ def process(inputImage,prompt,negative_prompt):
20
+
21
+ device = zero.device # "cuda"
22
+ base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
23
+ consistentID_path = "JackAILab/ConsistentID/ConsistentID-v1.bin"
24
+
25
+ ### Load base model
26
+ pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
27
+ base_model_path,
28
+ torch_dtype=torch.float16,
29
+ use_safetensors=True,
30
+ variant="fp16"
31
+ ).to(device)
32
+
33
+ ### Load consistentID_model checkpoint
34
+ pipe.load_ConsistentID_model(
35
+ os.path.dirname(consistentID_path),
36
+ subfolder="",
37
+ weight_name=os.path.basename(consistentID_path),
38
+ trigger_word="img",
39
+ )
40
+ pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
41
+
42
  # hyper-parameter
43
+ select_images = load_image(Image.fromarray(inputImage))
44
  num_steps = 50
45
+ merge_steps = 30
46
 
47
 
48
  if prompt == "":
49
+ prompt = "A man, in a forest, adventuring"
 
 
 
 
 
 
50
 
51
  if negative_prompt == "":
52
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
 
54
  #Extend Prompt
55
  prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
56
 
57
+ negtive_prompt_group="((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
58
  negative_prompt = negative_prompt + negtive_prompt_group
59
 
60
  seed = torch.randint(0, 1000, (1,)).item()
 
62
 
63
  images = pipe(
64
  prompt=prompt,
65
+ width=512,
66
+ height=512,
67
  input_id_images=select_images,
68
  negative_prompt=negative_prompt,
69
  num_images_per_prompt=1,
70
  num_inference_steps=num_steps,
71
  start_merge_step=merge_steps,
72
  generator=generator,
 
 
73
  ).images[0]
74
 
75
  current_date = datetime.today()
 
76
 
77
+ output_dir = script_directory + f"/images/gradio_outputs"
78
+ if not os.path.exists(output_dir):
79
+ os.makedirs(output_dir)
80
+
81
+ images.save(os.path.join(output_dir, f"{current_date}-{seed}.jpg"))
82
+
83
+ return os.path.join(output_dir, f"{current_date}-{seed}.jpg")
84
+
85
+
86
+ iface = gr.Interface(
87
+ fn=process,
88
+ inputs=[
89
+ gr.Image(label="Upload Image"),
90
+ gr.Textbox(label="prompt",placeholder="A man, in a forest, adventuring"),
91
+ gr.Textbox(label="negative prompt",placeholder="monochrome, lowres, bad anatomy, worst quality, low quality, blurry"),
92
+ ],
93
+ outputs=[
94
+ gr.Image(label="Output"),
95
+ ],
96
+ title="ConsistentID Demo",
97
+ description="Put reference portrait below"
98
+ )
99
+
100
+ iface.launch() # zero.device
101
+
102
+ # @spaces.GPU
103
+ # def greet(n):
104
+ # print(zero.device) # <-- 'cuda:0' 🤗
105
+ # return f"Hello {zero + n} Tensor"
106
+
107
+ # demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
108
+ # demo.launch()