I'v run your code on 32GB v100， but still OOM：

import torch
from diffusers import FluxKontextPipeline
from diffusers.utils import load_image
from dfloat11 import DFloat11Model

pipe = FluxKontextPipeline.from_pretrained("/data2/zyz/fluxkontext",
torch_dtype=torch.bfloat16)
DFloat11Model.from_pretrained(
"/data2/zyz/df11_flux",
device="cpu",
bfloat16_model=pipe.transformer,
)
pipe.enable_model_cpu_offload()

pipe.enable_sequential_cpu_offload()

pipe.enable_attention_slicing()

pipe.enable_vae_slicing()

pipe.enable_vae_tiling()

input_image = load_image("./cat.png")

image = pipe(
image=input_image,
prompt="给猫加一顶帽子",
guidance_scale=2.5,
).images[0]

image.save("df11_kontext.jpg")

The pipe.enable_* functions are conflicting with each other.

Use this script instead:

import torch
from diffusers import FluxKontextPipeline
from diffusers.utils import load_image
from dfloat11 import DFloat11Model

pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16)
DFloat11Model.from_pretrained(
    "DFloat11/FLUX.1-Kontext-dev-DF11",
    device="cpu",
    bfloat16_model=pipe.transformer,
)
pipe.enable_model_cpu_offload()

input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png")

image = pipe(
    image=input_image,
    prompt="Add a hat to the cat",
    guidance_scale=2.5,
).images[0]

image.save("kontext.png")