gg235d-1 / src /app.py
ssboost's picture
Update src/app.py
05a72e6 verified
import gradio as gr
import pillow_heif
import spaces
import torch
from PIL import Image, ImageEnhance, ImageFilter # ImageFilter 추가
from refiners.fluxion.utils import manual_seed, no_grad
from utils import load_ic_light, resize_modulo_8
import zipfile
from io import BytesIO
import tempfile
import cv2 # OpenCV 추가
import numpy as np # NumPy 추가
# HEIF/AVIF 이미지 지원 등록
pillow_heif.register_heif_opener()
pillow_heif.register_avif_opener()
# GPU 설정
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.float16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float32
ic_light = load_ic_light(device=DEVICE, dtype=DTYPE)
# 모델을 지정된 디바이스로 이동
ic_light.to(device=DEVICE, dtype=DTYPE)
ic_light.device = DEVICE
ic_light.dtype = DTYPE
ic_light.solver = ic_light.solver.to(device=DEVICE, dtype=DTYPE)
# 번역 관련 라이브러리 및 설정
import os
from openai import OpenAI
import uuid
import time
import logging
import random
from datetime import datetime, timedelta
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(message)s'
)
logger = logging.getLogger(__name__)
# API 키 설정
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# API 클라이언트 설정
openai_client = OpenAI(api_key=OPENAI_API_KEY)
def validate_input(text: str) -> bool:
"""입력 텍스트 유효성 검사"""
if not text.strip():
return False
if not any('\u3131' <= char <= '\u318F' or '\uAC00' <= char <= '\uD7A3' for char in text):
return False
return True
def translate_to_english(input_text: str):
"""한국어를 영어로 번역하는 함수 (GPT-4-mini 사용)"""
logger.info("GPT-4-mini 번역 시작")
if not validate_input(input_text):
logger.info("유효하지 않은 입력입니다.")
return "유효하지 않은 입력입니다. 한글을 포함한 텍스트를 입력해주세요."
try:
current_time = int(time.time() * 1000)
random.seed(current_time)
temperature = random.uniform(0.4, 0.85)
top_p = random.uniform(0.9, 0.98)
request_id = str(uuid.uuid4())[:8]
timestamp_micro = int(time.time() * 1000000) % 1000
system_msg = f"REQ-{request_id}-{timestamp_micro}"
current_hour = datetime.now().hour
time_context = f"Consider the current time is {current_hour}:00. "
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": system_msg
},
{
"role": "user",
"content": f"""
1. Translate the input Korean text into English with these rules:
2. Output ONLY the English translation.
3. Create a coherent, brief scene using provided words/phrases.
4. Stay strictly within the scope of input words.
5. Maintain flow and natural transitions.
6. Keep scene descriptions concise but complete.
7. Do not add external elements.
8. Focus on core scene elements.
9. Preserve original meaning and intent.
10. No explanations or meta-commentary.
11. No formatting beyond basic text.
12. Just the direct English translation.
Additional guidance:
13. Use input words harmoniously.
14. Create clear mental imagery.
15. Keep scene contained and focused.
16. Ensure logical connections.
17. Maintain narrative flow.
Input: {input_text} [Seed: {current_time}]
"""
}
],
max_tokens=100,
temperature=temperature,
top_p=top_p,
seed=current_time
)
translated = response.choices[0].message.content.strip()
logger.info("GPT-4-mini 번역 완료")
return translated
except Exception as e:
logger.error(f"번역 중 오류 발생: {str(e)}")
return f"번역 중 오류가 발생했습니다: {str(e)}"
@spaces.GPU(duration=120)
@no_grad()
def generate_images(
image: Image.Image,
prompt: str,
) -> tuple[Image.Image, Image.Image, Image.Image, Image.Image]:
assert image.mode == "RGBA", "입력 이미지는 RGBA 모드여야 합니다."
seed = torch.seed()
manual_seed(seed)
negative_prompt = "dirty, messy, worst quality, low quality, watermark, signature, jpeg artifacts, deformed, monochrome, black and white"
condition_scale = 1.25
num_inference_steps = 25
strength_first_pass = 0.9
strength_second_pass = 0.5
image = resize_modulo_8(image, 768)
mask = image.getchannel("A")
image_rgb = image.convert("RGB")
clip_text_embedding = ic_light.compute_clip_text_embedding(text=prompt, negative_text=negative_prompt)
ic_light.set_ic_light_condition(image=image_rgb, mask=mask)
light_pref_image = None
if light_pref_image is None:
x = torch.randn_like(ic_light._ic_light_condition) # pyright: ignore[reportPrivateUsage]
strength_first_pass = 1.0
else:
x = ic_light.lda.image_to_latents(light_pref_image)
x = ic_light.solver.add_noise(x, noise=torch.randn_like(x), step=0)
num_steps = int(round(num_inference_steps / strength_first_pass))
first_step = int(num_steps * (1 - strength_first_pass))
ic_light.set_inference_steps(num_steps, first_step)
for step in ic_light.steps:
x = ic_light(
x,
step=step,
clip_text_embedding=clip_text_embedding,
condition_scale=condition_scale,
)
num_steps = int(round(num_inference_steps / strength_second_pass))
first_step = int(num_steps * (1 - strength_second_pass))
ic_light.set_inference_steps(num_steps, first_step)
x = ic_light.solver.add_noise(x, noise=torch.randn_like(x), step=first_step)
for step in ic_light.steps:
x = ic_light(
x,
step=step,
clip_text_embedding=clip_text_embedding,
condition_scale=condition_scale,
)
bg_image = ic_light.lda.latents_to_image(x)
result = Image.composite(image_rgb, bg_image, mask)
return image_rgb, bg_image, mask, result
@no_grad()
def adjust_final_image(
bg_image: Image.Image,
mask: Image.Image,
bg_brightness: float,
bg_contrast: float,
bg_saturation: float,
bg_temperature: float,
bg_vibrance: float,
bg_color_mixer_blues: float,
bg_shadows: float,
) -> Image.Image:
print("Adjusting Final Image with the following parameters:")
print(f"Background - Brightness: {bg_brightness}, Contrast: {bg_contrast}, "
f"Saturation: {bg_saturation}, Temperature: {bg_temperature}, "
f"Vibrance: {bg_vibrance}, Blues: {bg_color_mixer_blues}, Shadows: {bg_shadows}")
enhancer = ImageEnhance.Brightness(bg_image)
bg_image = enhancer.enhance(bg_brightness)
enhancer = ImageEnhance.Contrast(bg_image)
bg_image = enhancer.enhance(bg_contrast)
enhancer = ImageEnhance.Color(bg_image)
bg_image = enhancer.enhance(bg_saturation)
if bg_temperature != 0.0:
cv_image = cv2.cvtColor(np.array(bg_image), cv2.COLOR_RGB2BGR).astype(np.float32)
value = float(bg_temperature) * 30
if value > 0:
cv_image[:, :, 2] = cv_image[:, :, 2] + value
cv_image[:, :, 0] = cv_image[:, :, 0] - value
else:
cv_image[:, :, 2] = cv_image[:, :, 2] + value
cv_image[:, :, 0] = cv_image[:, :, 0] - value
cv_image[:, :, 2] = np.clip(cv_image[:, :, 2], 0, 255)
cv_image[:, :, 0] = np.clip(cv_image[:, :, 0], 0, 255)
bg_image = Image.fromarray(cv2.cvtColor(cv_image.astype(np.uint8), cv2.COLOR_BGR2RGB))
if bg_vibrance != 0.0:
converter = ImageEnhance.Color(bg_image)
factor = 1 + (bg_vibrance / 100.0)
bg_image = converter.enhance(factor)
if bg_color_mixer_blues != 0.0:
r, g, b = bg_image.split()
b = b.point(lambda p: min(255, max(0, p + bg_color_mixer_blues)))
bg_image = Image.merge("RGB", (r, g, b))
if bg_shadows != 0.0:
grayscale = bg_image.convert("L")
mask_dark = grayscale.point(lambda p: p < 128 and 255)
mask_dark = mask_dark.convert("L")
mask_dark = mask_dark.filter(ImageFilter.GaussianBlur(radius=10))
if bg_shadows < 0:
factor = 1 + (bg_shadows / 100.0)
enhancer = ImageEnhance.Brightness(bg_image)
dark_adjusted = enhancer.enhance(factor)
else:
factor = 1 + (bg_shadows / 100.0)
enhancer = ImageEnhance.Brightness(bg_image)
dark_adjusted = enhancer.enhance(factor)
bg_image = Image.composite(dark_adjusted, bg_image, mask_dark)
return bg_image
def get_korean_timestamp():
korea_time = datetime.utcnow() + timedelta(hours=9)
return korea_time.strftime('%Y%m%d_%H%M%S')
def download_image(image, input_image_name):
if image is None:
return None
timestamp = get_korean_timestamp()
if input_image_name and hasattr(input_image_name, 'name'):
base_name = input_image_name.name.split('.')[0]
else:
base_name = "이미지"
file_name = f"[끝장AI]배경바꾸기_{base_name}_{timestamp}.jpg"
temp_file_path = tempfile.gettempdir() + "/" + file_name
image.save(temp_file_path, format="JPEG")
return temp_file_path
def download_all(original_image, bg_image, final_image):
if original_image is None or bg_image is None or final_image is None:
print("Download function received None input.")
return None
print("Preparing images for download...")
if original_image.mode != "RGB":
original_image = original_image.convert("RGB")
if bg_image.mode != "RGB":
bg_image = bg_image.convert("RGB")
if final_image.mode != "RGB":
final_image = final_image.convert("RGB")
class InputFileName:
def __init__(self, name):
self.name = name
timestamp = get_korean_timestamp()
original_path = download_image(original_image, InputFileName("original_image.jpg"))
bg_path = download_image(bg_image, InputFileName("background_image.jpg"))
final_path = download_image(final_image, InputFileName("final_image.jpg"))
zip_name = f"[끝장AI]배경바꾸기_{timestamp}.zip"
zip_path = tempfile.gettempdir() + "/" + zip_name
with zipfile.ZipFile(zip_path, 'w') as zip_file:
zip_file.write(original_path, os.path.basename(original_path))
zip_file.write(bg_path, os.path.basename(bg_path))
zip_file.write(final_path, os.path.basename(final_path))
print(f"ZIP file created at {zip_path}")
return zip_path
def reset_sliders(initial_result):
print("Resetting sliders to default values.")
return (
"필터없음",
gr.update(value=1.0),
gr.update(value=1.0),
gr.update(value=1.0),
gr.update(value=0.0),
gr.update(value=0.0),
gr.update(value=0.0),
gr.update(value=0.0),
initial_result,
)
def create_interface():
css = """
/* 프롬프트 텍스트박스 라벨과 입력 텍스트만 검은색으로 설정 */
.gr-textbox > label {
color: #000 !important;
}
.gr-textbox input {
color: #000 !important;
}
/* 필터 선택 라디오 버튼 라벨만 검은색으로 설정 */
.gradio-radio > label {
color: #000 !important;
}
/* 슬라이더 라벨과 현재 값 텍스트만 검은색으로 설정 */
.gr-slider > label {
color: #000 !important;
}
.gr-slider .slider-value {
color: #000 !important;
}
.gradio-radio label[aria-checked="true"] {
color: #000 !important;
background-color: #fff !important;
padding: 4px 8px;
border-radius: 4px;
border: 1px solid #000 !important;
}
.gradio-radio label[aria-checked="false"] {
color: #000 !important;
background-color: #fff !important;
border: 1px solid #000 !important;
}
.gradio-radio input[type="radio"] + label::before {
display: none;
}
.gradio-radio input[type="radio"]:checked + label::after {
content: "";
display: none;
}
.btn-primary, .btn-secondary, .gr-button {
color: #000 !important;
}
/* JPG 이미지 다운받기 버튼 배경색 검은색/글자색 흰색 강제 적용 */
.download-container .download-button.gr-button {
background-color: #000 !important;
color: #fff !important;
border: none !important;
}
.download-container .download-button.gr-button:hover {
background-color: #333 !important;
color: #fff !important;
}
"""
filters = [
{
"name": "필터없음",
"bg_brightness": 1.0,
"bg_contrast": 1.0,
"bg_saturation": 1.0,
"bg_temperature": 0.0,
"bg_vibrance": 0.0,
"bg_color_mixer_blues": 0.0,
"bg_shadows": 0.0
},
{
"name": "깨끗한",
"bg_brightness": 1.3,
"bg_contrast": 1.2,
"bg_saturation": 1.0,
"bg_temperature": -0.2,
"bg_vibrance": -20.0,
"bg_color_mixer_blues": 5.0,
"bg_shadows": -10.0
},
{
"name": "따스한",
"bg_brightness": 1.3,
"bg_contrast": 1.2,
"bg_saturation": 0.8,
"bg_temperature": 0.0,
"bg_vibrance": -10.0,
"bg_color_mixer_blues": 2.0,
"bg_shadows": -10.0
},
{
"name": "푸른빛",
"bg_brightness": 1.3,
"bg_contrast": 1.0,
"bg_saturation": 1.0,
"bg_temperature": 0.0,
"bg_vibrance": 0.0,
"bg_color_mixer_blues": 10.0,
"bg_shadows": 5.0
}
]
default_filter = next(filter for filter in filters if filter["name"] == "깨끗한")
with gr.Blocks(theme=gr.themes.Soft(
primary_hue=gr.themes.Color(
c50="#FFF7ED",
c100="#FFEDD5",
c200="#FED7AA",
c300="#FDBA74",
c400="#FB923C",
c500="#F97316",
c600="#EA580C",
c700="#C2410C",
c800="#9A3412",
c900="#7C2D12",
c950="#431407",
),
secondary_hue="zinc",
neutral_hue="zinc",
font=("Pretendard", "sans-serif")
), css=css) as demo:
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(
label="이미지 추가",
image_mode="RGBA",
type="pil",
)
prompt = gr.Textbox(
label="프롬프트 (한국어)",
placeholder="눈덮인 히말라야, 뭉게구름, 흰색 바닥에 놓여있는 양말",
)
run_button = gr.Button(
value="이미지 생성",
variant="primary",
size="lg"
)
reset_button = gr.Button(
value="필터 초기화",
variant="secondary",
size="lg"
)
filter_radio = gr.Radio(
label="필터 선택",
choices=["필터없음", "깨끗한", "따스한", "푸른빛"],
value="깨끗한",
type="value",
)
bg_brightness = gr.Slider(label="밝기", minimum=0.1, maximum=5.0, value=default_filter["bg_brightness"], step=0.1)
bg_contrast = gr.Slider(label="대비", minimum=0.1, maximum=5.0, value=default_filter["bg_contrast"], step=0.1)
bg_saturation = gr.Slider(label="채도", minimum=0.0, maximum=5.0, value=default_filter["bg_saturation"], step=0.1)
bg_temperature = gr.Slider(
label="색온도",
minimum=-1.0,
maximum=1.0,
value=default_filter["bg_temperature"],
step=0.1
)
bg_vibrance = gr.Slider(label="활기", minimum=-100.0, maximum=100.0, value=default_filter["bg_vibrance"], step=1.0)
bg_color_mixer_blues = gr.Slider(label="컬러 믹서 (블루)", minimum=-100.0, maximum=100.0, value=default_filter["bg_color_mixer_blues"], step=1.0)
bg_shadows = gr.Slider(label="그림자", minimum=-100.0, maximum=100.0, value=default_filter["bg_shadows"], step=1.0)
with gr.Column(scale=2):
background_output = gr.Image(
label="생성된 이미지",
type="pil",
interactive=False
)
final_output = gr.Image(
label="필터 적용 이미지",
type="pil",
interactive=False
)
with gr.Row(elem_classes="download-container"):
download_button = gr.Button(
value="JPG로 변환하기",
elem_classes="download-button",
variant="primary",
size="lg"
)
with gr.Row(elem_classes="download-container"):
download_output = gr.File(label="JPG 이미지 다운받기", elem_classes="download-output")
mask_state = gr.State()
image_rgb_state = gr.State()
initial_result = gr.State()
def apply_filter_preset(selected_filter):
for f in filters:
if f["name"] == selected_filter:
return (
gr.update(value=f["bg_brightness"]),
gr.update(value=f["bg_contrast"]),
gr.update(value=f["bg_saturation"]),
gr.update(value=f["bg_temperature"]),
gr.update(value=f["bg_vibrance"]),
gr.update(value=f["bg_color_mixer_blues"]),
gr.update(value=f["bg_shadows"]),
)
return (
gr.update(value=1.0),
gr.update(value=1.0),
gr.update(value=1.0),
gr.update(value=0.0),
gr.update(value=0.0),
gr.update(value=0.0),
gr.update(value=0.0),
)
filter_radio.change(
fn=apply_filter_preset,
inputs=[filter_radio],
outputs=[
bg_brightness,
bg_contrast,
bg_saturation,
bg_temperature,
bg_vibrance,
bg_color_mixer_blues,
bg_shadows,
],
)
def on_generate(image, prompt, bg_brightness, bg_contrast, bg_saturation, bg_temperature, bg_vibrance, bg_color_mixer_blues, bg_shadows):
if not prompt.strip():
logger.info("프롬프트가 비어 있습니다. 기본 프롬프트를 사용합니다.")
final_prompt = "high-quality professional studio photography, Realistic soft white tone bright lighting, HEIC, CR2, NEF"
else:
translated_prompt = translate_to_english(prompt)
if translated_prompt.startswith("유효하지 않은 입력") or translated_prompt.startswith("번역 중 오류"):
return None, None, None, None, None
final_prompt = f"{translated_prompt}, high-quality professional studio photography, Realistic soft white tone bright lighting, HEIC, CR2, NEF"
image_rgb, bg_img, mask, result = generate_images(image, final_prompt)
print("Image generation completed.")
if bg_img is not None and mask is not None and image_rgb is not None:
adjusted_bg = adjust_final_image(
bg_image=bg_img,
mask=mask,
bg_brightness=bg_brightness,
bg_contrast=bg_contrast,
bg_saturation=bg_saturation,
bg_temperature=bg_temperature,
bg_vibrance=bg_vibrance,
bg_color_mixer_blues=bg_color_mixer_blues,
bg_shadows=bg_shadows,
)
final_result = Image.composite(image_rgb, adjusted_bg, mask)
else:
final_result = result
initial_result.value = final_result
return bg_img, mask, final_result, image_rgb, final_result
run_button.click(
fn=on_generate,
inputs=[
input_image,
prompt,
bg_brightness,
bg_contrast,
bg_saturation,
bg_temperature,
bg_vibrance,
bg_color_mixer_blues,
bg_shadows,
],
outputs=[
background_output,
mask_state,
final_output,
image_rgb_state,
initial_result,
],
)
def on_adjust(
image_rgb,
bg_image, mask,
bg_brightness, bg_contrast, bg_saturation, bg_temperature, bg_vibrance, bg_color_mixer_blues, bg_shadows,
):
if bg_image is None or mask is None or image_rgb is None:
print("Adjust function received None input.")
return None
print(f"Adjusting background image... Current slider values:")
print(f"Brightness: {bg_brightness}, Contrast: {bg_contrast}, Saturation: {bg_saturation}, "
f"Temperature: {bg_temperature}, Vibrance: {bg_vibrance}, Blues: {bg_color_mixer_blues}, Shadows: {bg_shadows}")
adjusted_bg = adjust_final_image(
bg_image=bg_image,
mask=mask,
bg_brightness=bg_brightness,
bg_contrast=bg_contrast,
bg_saturation=bg_saturation,
bg_temperature=bg_temperature,
bg_vibrance=bg_vibrance,
bg_color_mixer_blues=bg_color_mixer_blues,
bg_shadows=bg_shadows,
)
print(f"adjusted_bg size: {adjusted_bg.size}, mode: {adjusted_bg.mode}")
adjusted_bg.save("adjusted_bg_debug.jpg")
input_image_rgb = image_rgb
print(f"input_image_rgb size: {input_image_rgb.size}, mode: {input_image_rgb.mode}")
print(f"adjusted_bg size: {adjusted_bg.size}, mode: {adjusted_bg.mode}")
print(f"mask size: {mask.size}, mode: {mask.mode}")
if input_image_rgb.size != adjusted_bg.size:
print(f"Resizing input_image_rgb from {input_image_rgb.size} to {adjusted_bg.size}")
input_image_rgb = input_image_rgb.resize(adjusted_bg.size)
mask = mask.convert("L")
try:
final_result = Image.composite(input_image_rgb, adjusted_bg, mask)
except ValueError as e:
print(f"Composite error: {e}")
return None
final_result.save("final_result_debug.jpg")
print("Final image composite completed.")
initial_result.value = final_result
return final_result
bg_sliders = [
bg_brightness,
bg_contrast,
bg_saturation,
bg_temperature,
bg_vibrance,
bg_color_mixer_blues,
bg_shadows,
]
for slider in bg_sliders:
slider.change(
fn=on_adjust,
inputs=[
image_rgb_state,
background_output,
mask_state,
bg_brightness,
bg_contrast,
bg_saturation,
bg_temperature,
bg_vibrance,
bg_color_mixer_blues,
bg_shadows,
],
outputs=final_output,
)
reset_button.click(
fn=reset_sliders,
inputs=[final_output],
outputs=[
filter_radio,
bg_brightness,
bg_contrast,
bg_saturation,
bg_temperature,
bg_vibrance,
bg_color_mixer_blues,
bg_shadows,
final_output,
],
)
download_button.click(
fn=download_all,
inputs=[
input_image,
background_output,
final_output
],
outputs=download_output,
)
return demo
if __name__ == "__main__":
logger.info("애플리케이션 시작")
demo = create_interface()
demo.queue()
demo.launch(server_name='0.0.0.0')