import gradio as gr
import numpy as np
import random
import os
import torch
from diffusers import StableDiffusionPipeline
from peft import PeftModel, LoraConfig
from diffusers import DiffusionPipeline

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id_default = "stable-diffusion-v1-5/stable-diffusion-v1-5"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024


def get_lora_sd_pipeline(
    ckpt_dir='./model_output', 
    base_model_name_or_path=model_id_default, 
    dtype=torch_dtype, 
    device=device, 
    adapter_name="pusheen"
):
    unet_sub_dir = os.path.join(ckpt_dir, "unet")
    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
    if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
        config = LoraConfig.from_pretrained(text_encoder_sub_dir)
        base_model_name_or_path = config.base_model_name_or_path

    if base_model_name_or_path is None:
        raise ValueError("Please specify the base model name or path")

    pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype).to(device)
    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)

    if os.path.exists(text_encoder_sub_dir):
        pipe.text_encoder = PeftModel.from_pretrained(
            pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
        )

    if dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    pipe.to(device)
    return pipe

    
# def encode_prompt(prompt, tokenizer, text_encoder):
#     text_inputs = tokenizer(
#         prompt,
#         padding="max_length",
#         max_length=tokenizer.model_max_length,
#         return_tensors="pt",
#     )
#     with torch.no_grad():
#         if len(text_inputs.input_ids[0]) < tokenizer.model_max_length:
#             prompt_embeds = text_encoder(text_inputs.input_ids.to(text_encoder.device))[0]
#         else:
#             embeds = []
#             start = 0
#             while start < tokenizer.model_max_length:
#                 end = start + tokenizer.model_max_length
#                 part_of_text_inputs = text_inputs.input_ids[0][start:end]
#                 if len(part_of_text_inputs) < tokenizer.model_max_length:
#                     part_of_text_inputs = torch.cat([part_of_text_inputs, torch.tensor([tokenizer.pad_token_id] * (tokenizer.model_max_length - len(part_of_text_inputs)))])
#                 embeds.append(text_encoder(part_of_text_inputs.to(text_encoder.device).unsqueeze(0))[0])
#                 start += int((8/
                              
#                               11)*tokenizer.model_max_length)
#             prompt_embeds = torch.mean(torch.stack(embeds, dim=0), dim=0)
#     return prompt_embeds


# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    prompt,
    negative_prompt,
    width=512,
    height=512,
    model_id=model_id_default,
    seed=42,
    guidance_scale=7.0,
    lora_scale=1.0,
    num_inference_steps=20,
    progress=gr.Progress(track_tqdm=True),
):  
    generator = torch.Generator(device).manual_seed(seed)
    pipe = get_lora_sd_pipeline(base_model_name_or_path=model_id)    
    pipe = pipe.to(device)
    pipe.fuse_lora(lora_scale=lora_scale)
    pipe.safety_checker = None
    # prompt_embeds = encode_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
    # negative_prompt_embeds = encode_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
    
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    ).images[0]
    
    return image

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css, fill_height=True) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Text-to-Image demo")

        with gr.Row():
            model_id = gr.Textbox(
                label="Model ID",
                max_lines=1,
                placeholder="Enter model id",
                value=model_id_default,
            )

        prompt = gr.Textbox(
            label="Prompt",
            max_lines=1,
            placeholder="Enter your prompt",
        )
        
        negative_prompt = gr.Textbox(
            label="Negative prompt",
            max_lines=1,
            placeholder="Enter your negative prompt",
        )
        
        with gr.Row():
            seed = gr.Number(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )
            
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=0.0,
                maximum=10.0,
                step=0.1,
                value=7.0,  # Replace with defaults that work for your model
            )
        with gr.Row():
            lora_scale = gr.Slider(
                label="LoRA scale",
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=1.0,
            )

            num_inference_steps = gr.Slider(
                label="Number of inference steps",
                minimum=1,
                maximum=50,
                step=1,
                value=20,  # Replace with defaults that work for your model
            )

        with gr.Accordion("Optional Settings", open=False):
            
            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,  # Replace with defaults that work for your model
                )
        
        run_button = gr.Button("Run", scale=0, variant="primary")
        result = gr.Image(label="Result", show_label=False)
            
    gr.on(
        triggers=[run_button.click],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            width,
            height,
            model_id,
            seed,
            guidance_scale,
            num_inference_steps,      
            lora_scale
            
        ],
        outputs=[result],
    )

if __name__ == "__main__":
    demo.launch()