Qwen/Qwen-Image-Edit · Run locally with 24 GB VRAM some GPU's, gradio script sharing and suggestions.

Hi guys !
There is no need to give much detail, Once you examine the script, you will understand everything.
Total Usage : 16 GB VRAM + 70 GB RAM (32GB DDR4 + 50GB nvmeRAMdisk) // (my gpu rtx3090)
Times : all model files loading 145 sec // one image edit or generate 95 sec.
Can further adjustments or improvements be made? We can consider this.

CODES :::

import os
import time
import torch
import datetime
import numpy as np
import gradio as gr
from PIL import Image
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
from transformers import Qwen2_5_VLForConditionalGeneration
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
from diffusers import QwenImageEditPipeline, QwenImageTransformer2DModel

# ========================================================
# MODEL YÜKLEME FONKSİYONLARI
# ========================================================

def load_model():
    start_time = time.time()
    print("🔄 Model yükleniyor... Bu biraz zaman alabilir, ortalama 2 dakika 30 saniye.")

    model_id = "Qwen/Qwen-Image-Edit"
    torch_dtype = torch.bfloat16

    # 1. Görsel Transformer (4-bit)
    print("1/6 - Görsel transformer yükleniyor...")
    quantization_config_diffusers = DiffusersBitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
    )
    transformer = QwenImageTransformer2DModel.from_pretrained(
        model_id,
        subfolder="transformer",
        quantization_config=quantization_config_diffusers,
        torch_dtype=torch_dtype,
    )
    transformer = transformer.to("cpu")

    # 2. Metin Encoder (4-bit)
    print("2/6 - Metin encoder yükleniyor...")
    quantization_config_transformers = TransformersBitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
    text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        model_id,
        subfolder="text_encoder",
        quantization_config=quantization_config_transformers,
        torch_dtype=torch_dtype,
    )
    text_encoder = text_encoder.to("cpu")

    # 3. Pipeline
    print("3/6 - Pipeline oluşturuluyor...")
    pipe = QwenImageEditPipeline.from_pretrained(
        model_id,
        transformer=transformer,
        text_encoder=text_encoder,
        torch_dtype=torch_dtype,
    )

    # 4. LoRA yükle
    print("4/6 - LoRA ağırlıkları yükleniyor...")
    try:
        pipe.load_lora_weights(
            "lightx2v/Qwen-Image-Lightning",
            weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
        )
    except Exception as e:
        print(f"⚠️ LoRA yüklenemedi: {e}")

    # 5. CPU offload
    print("5/6 - CPU offload etkinleştiriliyor...") 
    pipe.enable_model_cpu_offload()

    # 7. İNFERENCE (Görsel düzenleme)
    
    print("6/6 - Inference yükleniyor...")
    print("="*60)

    generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(42)
    elapsed = time.time() - start_time
    print(f"✅ Model başarıyla yüklendi ve hazır! Bu işlemler ({elapsed:.2f} saniye sürdü.")
   
    
    return pipe

# ========================================================
# GLOBALE PIPELINE (Uygulama başlangıcında bir kez yükle)
# ========================================================

try:
    pipe = load_model()
except Exception as e:
    print(f"❌ Model yüklenemedi: {e}")
    raise

# ========================================================
# RESİM BOYUTU KONTROL VE ÖLÇEKLENDİRME FONKSİYONU
# ========================================================

def resize_image_if_needed(image, max_pixels=1048576):
    """
    Resim boyutunu kontrol eder ve gerekirse proporsiyon koruyarak ölçeklendirir.
    
    Args:
        image: PIL Image objesi
        max_pixels: Maksimum pixel sayısı (varsayılan: 1024x1024=1048576)
    
    Returns:
        PIL Image: Ölçeklendirilmiş resim (gerekirse)
    """
    width, height = image.size
    current_pixels = width * height
    
    print(f"🔍 Resim boyutu kontrol ediliyor: {width}x{height} = {current_pixels:,} pixel")
    
    if current_pixels <= max_pixels:
        print("✅ Resim boyutu uygun, ölçeklendirme gerekli değil.")
        return image
    
    # Ölçeklendirme oranını hesapla
    scale_factor = (max_pixels / current_pixels) ** 0.5
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)

    # Yeni boyutları 4'ün katı olacak şekilde ayarla (model gereksinimi)
    new_width = new_width - (new_width % 4)
    new_height = new_height - (new_height % 4)

    # Eğer resim çözünürlüğü çok küçükse, En az 256x256 olacak şekilde güncelle
    new_width = max(new_width, 256)
    new_height = max(new_height, 256)
    
    print(f"📏 Resim ölçeklendiriliyor: {width}x{height} -> {new_width}x{new_height}")
    print(f"📊 Ölçeklendirme oranı: {scale_factor:.3f}")
    
    # Resmi ölçeklendir (LANCZOS en kaliteli yeniden örnekleme)
    resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    new_pixels = new_width * new_height
    print(f"✅ Ölçeklendirme tamamlandı: {new_pixels:,} pixel")
    
    return resized_image


# ========================================================
# GRADIO RESİM DÜZENLEME FONKSİYONU
# ========================================================

def edit_image(input_image, prompt):
    steps=8
    #seed=42
    if input_image is None:
        return None, "Lütfen bir görüntü yükleyin."
    if not prompt.strip():
        return None, "Lütfen bir açıklama (prompt) girin."

    try:

        # Resim boyutu kontrolü ve ölçeklendirme
        processed_image = resize_image_if_needed(input_image)

        start_time = time.time()
        result = pipe(
            image=processed_image,
            prompt=prompt,
            num_inference_steps=int(steps)
        ).images[0]

        print(f"🔄 Resim düzenlemeleri {steps} Adımda uygulanacak...")

        elapsed = time.time() - start_time

        print(f"✅ Resim Başarıyla düzenlendi! ({elapsed:.2f} saniye)")

        # Otomatik kaydet
        save_status = save_image_locally(result)
        print(save_status)  # log için

        return result, f"✅ Başarıyla düzenlendi! ({elapsed:.2f} saniye)\n{save_status}"

    except Exception as e:
        return None, f"❌ Hata oluştu: {str(e)}"
    
def save_image_locally(image_input):
    if image_input is None:
        return "❌ Kaydedilecek görsel yok!"

    try:
        # Dönüştür: numpy -> PIL
        if isinstance(image_input, np.ndarray):
            image_pil = Image.fromarray(np.clip(image_input, 0, 255).astype(np.uint8))
        elif isinstance(image_input, Image.Image):
            image_pil = image_input
        else:
            return "❌ Desteklenmeyen görsel formatı."

        # RGB'ye dön
        if image_pil.mode != "RGB":
            image_pil = image_pil.convert("RGB")
            

        # Dosya adı
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        output_dir = "outputs"
        os.makedirs(output_dir, exist_ok=True)
        output_path = os.path.join(output_dir, f"qwie_{timestamp}.png")
        

        # Kaydet
        image_pil.save(output_path)
        abs_path = os.path.abspath(output_path)

        return f"✅ Kaydedildi: {abs_path}"
    except Exception as e:
        return f"❌ Hata: {str(e)}"
    
# ========================================================
# GRADIO ARAYÜZ
# ========================================================

with gr.Blocks(title="🎨 Qwen-Image Edit - Yerel Uygulama") as demo:
    gr.Markdown("""
    # 🎨 Qwen-Image Edit ile Görsel Düzenleme
    Görsellerinizi metinle düzenleyin! (Örn: 'Kazak kırmızı çizgili olsun')
    """)

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Girdi Görseli", elem_id="input_img")
            prompt = gr.Textbox(label="Prompt (Açıklama)", placeholder="Kazak kırmızı çizgili olsun", value="change the color of clothes to pink")
            
            btn = gr.Button("🎨 Düzenlemeyi Uygula", variant="primary")

        with gr.Column():
            output_image = gr.Image(label="Çıktı Görseli", elem_id="output_img")
            status = gr.Textbox(label="Durum", value="Hazır")

    btn.click(
        fn=edit_image,
        inputs=[input_image, prompt],
        outputs=[output_image, status]
    )

    gr.Markdown("""
    <br>
    <small>
    Model: <a href="https://huggingface.co/Qwen/Qwen-Image-Edit" target="_blank">Qwen/Qwen-Image-Edit</a> |
    LoRA: <a href="https://huggingface.co/lightx2v/Qwen-Image-Lightning" target="_blank">lightx2v/Qwen-Image-Lightning</a><br>
    Not: İlk çalışma biraz uzun sürer (model yüklenir). Sonraki çalıştırmalar hızlı olur. <br> Ortalama bir resmin düzenlenip oluşturulması 1 dakika 30 saniye civarı sürer.
    </small>
    """)

# ========================================================
# UYGULAMAYI BAŞLAT
# ========================================================

if __name__ == "__main__":
    demo.launch(
        server_name="127.0.0.1",
        server_port=7860,
        share=False,  # True yaparsan dışarıdan erişilebilir link verir
        inbrowser=True  # Otomatik tarayıcıyı açar
    )

SAMPLE LOGS :::

============================================================.
🔄 Model yükleniyor... Bu biraz zaman alabilir, ortalama 2 dakika 30 saniye.
------------------------------------------------------------------------------------------------.
1/6 - Görsel transformer yükleniyor...
Fetching 9 files: 100%|██████████████████████████████████████████████████████| 9/9 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|█████████████████████████████████████| 9/9 [00:56<00:00,  6.23s/it]
2/6 - Metin encoder yükleniyor...
Loading checkpoint shards: 100%|█████████████████████████████████████| 4/4 [00:58<00:00, 14.53s/it]
3/6 - Pipeline oluşturuluyor...
Loading pipeline components...: 100%|████████████████████████████████| 6/6 [00:01<00:00,  3.78it/s]
4/6 - LoRA ağırlıkları yükleniyor...
5/6 - CPU offload etkinleştiriliyor...
6/6 - Inference yükleniyor...
============================================================.
✅ Model başarıyla yüklendi ve hazır! Bu işlemler (144.80 saniye sürdü.
============================================================.

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.

============================================================.
🔍 Resim boyutu kontrol ediliyor: 1088x1440 = 1,566,720 pixel
📏 Resim ölçeklendiriliyor: 1088x1440 -> 888x1176
📊 Ölçeklendirme oranı: 0.818
✅ Ölçeklendirme tamamlandı: 1,044,288 pixel
100%|████████████████████████████████████████████████████████████████| 8/8 [01:04<00:00,  8.10s/it]
🔄 Resim düzenlemeleri 8 Adımda uygulanacak...
✅ Resim Başarıyla düzenlendi! (96.51 saniye)
✅ Kaydedildi: E:\ai\aya\q-img-edt\outputs\qwie_2025-08-26_21-18-30.png
🔍 Resim boyutu kontrol ediliyor: 2880x3840 = 11,059,200 pixel
📏 Resim ölçeklendiriliyor: 2880x3840 -> 884x1180
📊 Ölçeklendirme oranı: 0.308
✅ Ölçeklendirme tamamlandı: 1,043,120 pixel
100%|████████████████████████████████████████████████████████████████| 8/8 [01:04<00:00,  8.08s/it]
🔄 Resim düzenlemeleri 8 Adımda uygulanacak...
✅ Resim Başarıyla düzenlendi! (100.50 saniye)
✅ Kaydedildi: E:\ai\aya\q-img-edt\outputs\qwie_2025-08-26_21-22-43.png
🔍 Resim boyutu kontrol ediliyor: 819x1024 = 838,656 pixel
✅ Resim boyutu uygun, ölçeklendirme gerekli değil.
100%|████████████████████████████████████████████████████████████████| 8/8 [01:04<00:00,  8.08s/it]
🔄 Resim düzenlemeleri 8 Adımda uygulanacak...
✅ Resim Başarıyla düzenlendi! (94.06 saniye)
✅ Kaydedildi: E:\ai\aya\q-img-edt\outputs\qwie_2025-08-26_21-27-59.png