import torch
from diffusers import DiffusionPipeline

import os
hf_token = os.environ["HF_TOKEN"]


# Model and inference parameters
model_id = "black-forest-labs/FLUX.1-schnell"
prompt = "A cat holding a sign that says hello world"
image_width = 768
image_height = 1360
num_inference_steps = 4

# Hardware-specific optimizations for CPU only
device = "cpu"
torch_dtype = torch.float32 # Use float32 for CPU

# Load the pipeline
# FLUX models do not use guidance_scale and benefit from specific step counts
pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
pipe = pipe.to(device)

# Enable attention slicing for CPU memory optimization, especially for larger images
pipe.enable_attention_slicing()

# Run inference
# IMPORTANT: guidance_scale is not used for FLUX models
image = pipe(
    prompt=prompt,
    width=image_width,
    height=image_height,
    num_inference_steps=num_inference_steps
).images[0]

# Save or display the image (example)
# image.save("optimized_flux_output.png")
# print("Image generated and saved.")