File size: 15,467 Bytes
6359a1f 4ddbeae 6359a1f 9ce5766 2f3d2b6 cac4e15 9ce5766 4ddbeae 9ce5766 156ff8a 9ce5766 4ddbeae 9fa45e4 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae cac4e15 4ddbeae 9fa45e4 cac4e15 4ddbeae 6359a1f 4ddbeae 14ae48c 6359a1f 9d3dabd 152c7d3 6359a1f 9ce5766 e1d35f0 6359a1f 152c7d3 6359a1f 152c7d3 6359a1f e1d35f0 152c7d3 9ce5766 6359a1f 9ce5766 6f508ed 9a92a7a 0c3dac8 c4c86e6 6359a1f 14ae48c 6359a1f cac4e15 b50b7da 6359a1f cac4e15 6359a1f 9ce5766 a057ae6 9ce5766 47a3780 9ce5766 4ddbeae bb2709f 4ddbeae bb2709f 9d3dabd bb2709f 9d3dabd f951c72 9d3dabd 9ce5766 e1d35f0 9ce5766 6359a1f 9ce5766 6359a1f 9ce5766 6359a1f 9d3dabd e1d35f0 9ce5766 6359a1f 9ce5766 6359a1f 9d3dabd 6359a1f 9ce5766 9d3dabd 9ce5766 6908ed7 9ce5766 6359a1f 9ce5766 6359a1f 9ce5766 47a3780 9ce5766 14ae48c c44860a 14ae48c d33d8e6 14ae48c 9d3dabd 9ce5766 6359a1f 9d3dabd 6359a1f 9ce5766 5bf3f79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 |
import os
import re
import threading
import time
from datetime import datetime, timedelta
import gradio as gr
import random
import spaces
from diffusers import CogView4Pipeline
import torch
from openai import OpenAI
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = CogView4Pipeline.from_pretrained("THUDM/CogView4-6B", torch_dtype=torch.bfloat16).to(device)
def clean_string(s):
s = s.replace("\n", " ")
s = s.strip()
s = re.sub(r"\s{2,}", " ", s)
return s
def convert_prompt(
prompt: str,
retry_times: int = 5,
) -> str:
if not os.environ.get("OPENAI_API_KEY"):
return prompt
client = OpenAI()
prompt = clean_string(prompt)
for i in range(retry_times):
try:
response = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a bilingual image description assistant that works with an image generation bot. You work with an assistant bot that will draw anything you say . \n For example ,For example, outputting \"a beautiful morning in the woods with the sun peaking through the trees\" or \"阳光透过树林的美丽清晨\" will trigger your partner bot to output an image of a forest morning, as described . \n You will be prompted by people looking to create detailed , amazing images . The way to accomplish this is to take their short prompts and make them extremely detailed and descriptive . \n There are a few rules to follow : \n - Input can be in Chinese or English. If input is in English, prompt should be written in English. If input is in Chinese, prompt should be written in Chinese.\n - You will only ever output a single image description per user request .\n - Image descriptions must be detailed and specific, including keyword categories such as subject, medium, style, additional details, color, and lighting. \n - When generating descriptions, focus on portraying the visual elements rather than delving into abstract psychological and emotional aspects. Provide clear and concise details that vividly depict the scene and its composition, capturing the tangible elements that make up the setting.\n - Do not provide the process and explanation, just return the modified description . \n "
},
{
"role": "user",
"content": "Create an imaginative image descriptive caption for the user input : \"An anime girl stands amidst a dense flower bush.\""
},
{
"role": "assistant",
"content": "This image is a beautifully crafted digital illustration in an anime style. It features Suzuran, a character from the video game Arknights. She stands gracefully amidst a picturesque meadow with lush green grass and scattered wildflowers that gently sway in the breeze. Her attire includes a detailed outfit with layered ruffles and intricate fastenings, reflecting both elegance and functionality typical of the game's aesthetic. Suzuran holds a small bouquet of flowers delicately in her hands, adding to the serene atmosphere.\n\nProminent fox ears extend from her head, signifying her animal spirit form or persona within the game. The background showcases rolling hills covered in dense foliage under a brilliant blue sky dotted with fluffy white clouds. Golden leaves float whimsically through the air, enhancing the magical quality of the scene. A distant building partially obscured by vegetation hints at human presence without detracting from the natural beauty surrounding Suzuran. Overall, this artwork captures a moment of tranquility and charm, embodying the essence of the character and the enchanting world she inhabits."
},
{
"role": "user",
"content": "Create an imaginative image descriptive caption for the user input : \"Draw a bright convertible car with a sense of artistic design.\""
},
{
"role": "assistant",
"content": "The image showcases a meticulously crafted Delahaye 135 M.S. Special Roadster from 1937 in a highly polished and realistic rendering style that highlights its luxurious design and impeccable details. The car's body is an elegant deep brown with a glossy finish, exuding sophistication and timeless beauty. Its aerodynamic, streamlined shape features smooth curves accentuated by chrome detailing on the fenders and running boards. The front grille has three prominent circular headlights, adding to its classic charm while the dual exhaust pipes are tastefully integrated into the rear fenders.\n\nThe open cockpit area reveals sumptuous tan leather seats, emphasizing both comfort and elegance for the driver and passenger. The vehicle’s number plate reads \"MB 175\". Photographed against a dark gradient background, the focus remains solely on this automotive masterpiece, allowing viewers to appreciate every intricate detail without distraction. This image captures not just a car but a piece of art, reflecting the luxury and craftsmanship emblematic of pre-war European automobiles."
},
{
"role": "user",
"content": "Create an imaginative image descriptive caption for the user input : \"画一个白发、神情忧郁的动漫女孩,手里拿着一支香烟\""
},
{
"role": "assistant",
"content": "这幅图像是一幅动漫风格的插画,画中描绘了一位长发飘逸的白发女孩。她神情忧郁,双眼低垂,脸上带着微微的愁容。女孩穿着浅色外套,里面搭配深色衬衫和领带,增添了她沉静却时尚的外表。她的手靠近嘴边,似乎在拿着一支香烟,画作的说明也表示她在吸烟。背景描绘了一个冬季的城市场景,地面和建筑物上覆盖着积雪。几株稀疏的植物从雪地中探出,街边停着一辆车,似乎是空荡荡的街道,增强了场景的荒凉氛围。这幅作品整体风格细致精美,典型的现代动漫插画风格,既捕捉了角色的情感表达,也呈现了周围环境的静谧氛围"
},
{
"role": "user",
"content": "Create an imaginative image descriptive caption for the user input : \"一张红色的海报,中间写有“开门大吉”\""
},
{
"role": "assistant",
"content": "这张图片采用了海报风格,色彩鲜艳,主要以红色和金色为主,寓意吉祥如意。在画面的正中间是一块红色的大匾,上面用白色大字写着“开门大吉”,四角点缀着金色的装饰图案,显得格外喜庆。匾额上方悬挂着一对红色的灯笼,增添了节日的气氛。背景左右两侧是传统的绿色中式建筑,屋顶呈现出典型的飞檐翘角设计。底部有祥云朵朵和可爱的卡通福袋,象征着好运福气满满。整张海报传达出浓厚的节日氛围。"
},
{
"role": "user",
"content": f"Create an imaginative image descriptive caption for the user input : {prompt}"
}
],
model="glm-4-plus",
temperature=0.01,
top_p=0.7,
stream=False,
max_tokens=300,
)
prompt = response.choices[0].message.content
if prompt:
prompt = clean_string(prompt)
break
except Exception as e:
pass
return prompt
def delete_old_files():
while True:
now = datetime.now()
cutoff = now - timedelta(minutes=5)
os.makedirs("./gradio_tmp", exist_ok=True)
directories = ["./gradio_tmp"]
for directory in directories:
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
if os.path.isfile(file_path):
file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
if file_mtime < cutoff:
os.remove(file_path)
time.sleep(600)
threading.Thread(target=delete_old_files, daemon=True).start()
@spaces.GPU(duration=180) # [uncomment to use ZeroGPU]
def infer(prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, num_images,
progress=gr.Progress(track_tqdm=True)):
if randomize_seed:
seed = random.randint(0, 65536)
images = pipe(
prompt=prompt,
guidance_scale=guidance_scale,
num_images_per_prompt=num_images, # 生成 num_images 张图
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=torch.Generator().manual_seed(seed)
).images # 获取生成的图片列表
return images, seed
examples = [
"A single distinctively-featured subject with exaggerated body proportions, Dali-inspired facial features including a long mustache and elongated features, a woman in the background, abundant melting objects, and other Dali-inspired distortions in the scene, expressing a balance of melancholy and subtle hints of happiness in a mysterious setting with a muted color palette and a deep, atmospheric environment, created using oil painting techniques, applying rule of thirds, leading lines, and a clear focal point, inspired by Dali's 'birth and death' motif",
"A single distinctively-featured subject with extremely pronounced and unique features in a monochromatic, Guernica-inspired picasso's Les Demoiselles d'Avignon style, standing out against the rain in a gaudin-inspired rainy street scene, with a subtle Guernica horse silhouette in the background, surrounded by less prominent origami-style cubist bodies"
]
with gr.Blocks() as demo:
gr.HTML("""
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
CogView4-6B Hugging Face Space🤗
</div>
<div style="text-align: center;">
<a href="https://huggingface.co/THUDM/CogView4-6B">🤗 Model Hub |
<a href="https://github.com/THUDM/CogView4">🌐 Github</a> |
<a href="https://arxiv.org/abs/2403.05121">📜 arxiv </a>
</div>
<div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
<span>If the Space is too busy, duplicate it to use privately</span>
<a href="https://huggingface.co/spaces/THUDM-HF-SPACE/CogView4?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
margin-left: .75em;
"></a>
</div>
<div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
⚠️ This demo is for academic research and experiential use only.
</div>
""")
with gr.Column():
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=15,
placeholder="Enter your prompt",
container=False,
)
with gr.Row():
# enhance = gr.Button("Enhance Prompt (Strongly Suggest)", scale=1)
# enhance.click(
# convert_prompt,
# inputs=[prompt],
# outputs=[prompt]
# )
run_button = gr.Button("Run", scale=1)
num_images = gr.Radio(
choices=[1, 2, 4],
label="Number of Images",
value=1
)
result = gr.Gallery(label="Results", show_label=True, columns=2, rows=2)
MAX_PIXELS = 2 ** 21
def update_max_height(width):
max_height = MAX_PIXELS // width
return gr.update(maximum=max_height)
def update_max_width(height):
max_width = MAX_PIXELS // height
return gr.update(maximum=max_width)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=65536,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=512,
maximum=2048,
step=32,
value=1024,
)
height = gr.Slider(
label="Height",
minimum=512,
maximum=2048,
step=32,
value=1024,
)
width.change(update_max_height, inputs=[width], outputs=[height])
height.change(update_max_width, inputs=[height], outputs=[width])
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=10.0,
step=0.1,
value=3.5,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=10,
maximum=100,
step=1,
value=50,
)
with gr.Column():
gr.Markdown("### Examples (Enhance prompt with pain++)")
for i, ex in enumerate(examples):
with gr.Row():
ex_btn = gr.Button(
value=ex,
variant="secondary",
elem_id=f"ex_btn_{i}",
scale=3
)
ex_img = gr.Image(
value=f"img_{i + 1}.jpg",
label="Effect",
interactive=False,
height=130,
width=130,
scale=1
)
ex_btn.click(fn=lambda ex=ex: ex, inputs=[], outputs=prompt)
def update_gallery_layout(num_images):
if num_images == 1:
return gr.update(columns=1, rows=1)
elif num_images == 2:
return gr.update(columns=2, rows=1)
elif num_images == 4:
return gr.update(columns=2, rows=2)
return gr.update(columns=2, rows=2)
num_images.change(update_gallery_layout, inputs=[num_images], outputs=[result])
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, num_images],
outputs=[result, seed]
)
demo.queue().launch()
|