ugly-holiday-card-generator / utils /helper_utilities.py
Mikiko Bazeley
Refactored and removed controlnet
57eccf2
# helper_utilities.py
import os
from PIL import Image, ImageDraw, ImageFont, ImageColor
import requests
from io import BytesIO
import textwrap
import cv2
import numpy as np
from dotenv import load_dotenv
# Import configuration values from configuration.py
from utils.configuration import (
fonts, # Now this should correctly import fonts
default_guidance_scale,
default_control_mode,
default_num_inference_steps,
default_seed,
default_controlnet_conditioning_scale,
flux_model_url_template,
control_net_url,
get_headers,
get_control_net_headers,
valid_aspect_ratios
)
# ----------------------------------------------
# Environment Handling
# ----------------------------------------------
def load_env(dotenv_path):
"""Loads environment variables from a .env file."""
load_dotenv(dotenv_path, override=True)
# ----------------------------------------------
# General Utilities
# ----------------------------------------------
def get_font(font_path, font_size):
"""Tries to load a specified font. Falls back to default if not found."""
try:
font = ImageFont.truetype(font_path, font_size)
except IOError:
font = ImageFont.load_default()
return font
def send_post_request(url, headers, data, files=None):
"""A general function to send POST requests and handle responses."""
if files:
response = requests.post(url, headers=headers, files=files, data=data)
else:
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
return response
else:
raise RuntimeError(f"Request failed with status code: {response.status_code}, Response: {response.text}")
def resize_image(image, size):
"""Resizes the image to the specified size."""
return image.resize(size)
def combine_images(image1, image2):
"""Combines two images side by side."""
combined = Image.new("RGB", (image1.width + image2.width, max(image1.height, image2.height)))
combined.paste(image1, (0, 0))
combined.paste(image2, (image1.width, 0))
return combined
# ----------------------------------------------
# FLUX API and ControlNet Functions
# ----------------------------------------------
def generate_flux_image(model_path, api_key, prompt, steps=default_num_inference_steps,
aspect_ratio="16:9", guidance_scale=default_guidance_scale,
seed=default_seed, deployment=None):
"""
Generates an image using the FLUX model based on the provided parameters.
:param model_path: Path to the FLUX model
:param api_key: API key for authentication
:param prompt: Text prompt to generate the image
:param steps: Number of inference steps for the model
:param aspect_ratio: Desired aspect ratio for the output image
:param guidance_scale: How strictly the model should follow the prompt
:param seed: Seed value for randomization (for reproducibility)
:param deployment: Optional deployment string for specific model deployments
:return: Generated image as a PIL image
"""
# Build the request URL
base_url = flux_model_url_template.format(model_path=model_path)
# If a specific deployment is provided, add it to the URL as a query parameter
if deployment:
url = f"{base_url}?deployment={deployment}"
else:
url = base_url
headers = get_headers(api_key)
# Data payload for the request
data = {
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"guidance_scale": guidance_scale,
"num_inference_steps": steps,
"seed": seed
}
# Send the POST request and handle the response
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
# If the response is successful, convert the response content into an image
img = Image.open(BytesIO(response.content))
return img
else:
# Raise an error if the request fails
raise RuntimeError(f"Failed to generate image: {response.status_code}, {response.text}")
def call_control_net_api(control_image, prompt, api_key,
control_mode=0,
guidance_scale=default_guidance_scale,
num_inference_steps=default_num_inference_steps,
seed=default_seed,
controlnet_conditioning_scale=default_controlnet_conditioning_scale):
"""
Calls the ControlNet API, sending a control image and prompt.
Generates a new image based on ControlNet, processes the control image,
and handles aspect ratios.
"""
# Process control image for ControlNet
processed_image_bytes, processed_image = process_image(control_image)
files = {'control_image': ('control_image.jpg', processed_image_bytes, 'image/jpeg')}
# Calculate aspect ratio based on control image dimensions
width, height = control_image.size
aspect_ratio = f"{width}:{height}"
data = {
'prompt': prompt,
'control_mode': control_mode,
'aspect_ratio': aspect_ratio,
'guidance_scale': guidance_scale,
'num_inference_steps': num_inference_steps,
'seed': seed,
'controlnet_conditioning_scale': controlnet_conditioning_scale
}
url = control_net_url
headers = get_control_net_headers(api_key)
# Send the POST request to ControlNet API
response = send_post_request(url, headers, data, files)
# Convert the response to an image
generated_image = Image.open(BytesIO(response.content))
return generated_image, processed_image
# ----------------------------------------------
# Image Manipulation Utilities
# ----------------------------------------------
def overlay_text_on_image(image, text, font_path, font_size, position):
"""Draws text on the image at the specified position."""
draw = ImageDraw.Draw(image)
font = get_font(font_path, font_size)
draw.text(position, text, font=font, fill="black")
return image
def get_closest_aspect_ratio(width, height):
"""
Finds the closest valid aspect ratio for the given image dimensions.
Uses the valid_aspect_ratios from configuration.py.
"""
aspect_ratio = width / height
closest_ratio = min(valid_aspect_ratios.keys(), key=lambda x: abs((x[0] / x[1]) - aspect_ratio))
return valid_aspect_ratios[closest_ratio]
def get_next_largest_aspect_ratio(width, height):
"""
Finds the next largest valid aspect ratio for the given image dimensions.
Returns the aspect ratio as a tuple, formatted as (width, height).
"""
aspect_ratio = width / height
larger_ratios = [(x[0] / x[1], x) for x in valid_aspect_ratios.keys() if (x[0] / x[1]) >= aspect_ratio]
if larger_ratios:
# Return the smallest of the larger valid aspect ratios
next_largest_ratio = min(larger_ratios, key=lambda x: x[0])
return next_largest_ratio[1] # Return the tuple (width, height)
else:
# If no larger aspect ratio is found, fall back to the closest
closest_ratio = min(valid_aspect_ratios.keys(), key=lambda x: abs((x[0] / x[1]) - aspect_ratio))
return closest_ratio
def process_image(image):
"""
Processes an image by converting it to grayscale and detecting edges.
Returns the edge-detected image.
"""
gray_image = image.convert('L')
np_image = np.array(gray_image)
edges = cv2.Canny(np_image, 100, 200)
edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
return Image.fromarray(edges_rgb)
def draw_crop_preview(image, x, y, width, height):
"""Draws a red rectangle on the image to preview a crop region."""
draw = ImageDraw.Draw(image)
draw.rectangle([x, y, x + width, y + height], outline="red", width=2)
return image
def wrap_text(text, max_chars):
"""Wraps text to a specified number of characters per line."""
return "\n".join(textwrap.fill(line, width=max_chars) for line in text.split("\n"))
# ----------------------------------------------
# Text and Image Combination Utilities
# ----------------------------------------------
def add_custom_message(image, message, font_path, font_size, position_vertical, position_horizontal, max_chars, bg_color, font_color, alpha):
"""
Adds a custom message to the image with specified font, positioning, and background color.
Supports text wrapping and transparent background behind the text.
"""
# Load font
try:
font = ImageFont.truetype(font_path, font_size)
except IOError:
font = ImageFont.load_default()
# Convert image to RGBA if it's not already
if image.mode != "RGBA":
image = image.convert("RGBA")
# Create an overlay for the text
overlay = Image.new("RGBA", image.size, (255, 255, 255, 0)) # Fully transparent
draw = ImageDraw.Draw(overlay)
# Wrap the message text
message = wrap_text(message, max_chars)
img_width, img_height = image.size
text_lines = message.split("\n")
line_height = draw.textbbox((0, 0), "A", font=font)[3] # Calculate height of a line of text
total_text_height = line_height * len(text_lines)
text_width = max([draw.textbbox((0, 0), line, font=font)[2] for line in text_lines])
# Horizontal positioning
if position_horizontal == "Left":
x_pos = 10 # Padding from the left
elif position_horizontal == "Center":
x_pos = (img_width - text_width) // 2
else: # "Right"
x_pos = img_width - text_width - 10 # Padding from the right
# Vertical positioning
if position_vertical == "Top":
y_pos = 10 # Padding from the top
elif position_vertical == "Center":
y_pos = (img_height - total_text_height) // 2
else: # "Bottom"
y_pos = img_height - total_text_height - 10 # Padding from the bottom
# Draw the semi-transparent background rectangle behind the text
padding = 10
bg_color_rgba = (*ImageColor.getrgb(bg_color), alpha) # Apply transparency
draw.rectangle([x_pos - padding, y_pos - padding, x_pos + text_width + padding, y_pos + total_text_height + padding], fill=bg_color_rgba)
# Draw the text line by line
for i, line in enumerate(text_lines):
draw.text((x_pos, y_pos + i * line_height), line, font=font, fill=font_color)
# Composite the overlay with the original image
combined = Image.alpha_composite(image, overlay)
return combined.convert("RGB") # Convert back to RGB for saving/display