import gradio as gr
import cv2
import torch
import numpy as np
from diffusers import StableDiffusionPipeline
from transformers import AutoProcessor, AutoModel, AutoTokenizer
from PIL import Image

# Load the Real-Time Latent Consistency Model
device = "cuda" if torch.cuda.is_available() else "cpu"
##realtime_pipe = StableDiffusionPipeline.from_pretrained("radames/Real-Time-Latent-Consistency-Model").to(device)

# Load the model (optimized for inference)
model_id = "radames/Real-Time-Latent-Consistency-Model"

tokenizer = AutoTokenizer.from_pretrained(model_id)

realtime_pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
realtime_pipe.to("cuda")  # Use GPU for faster inference


def process_frame(frame, prompt="A futuristic landscape"):
    """Process a single frame using the real-time latent consistency model."""
    
    # Convert frame to PIL image
    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((512, 512))
    
    # Apply Real-Time Latent Consistency Model
    result = realtime_pipe(prompt=prompt, image=image, strength=0.5, guidance_scale=7.5).images[0]
    return np.array(result)

def video_stream(prompt):
    """Captures video feed from webcam and sends to the AI model."""
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame = process_frame(frame, prompt)
        yield frame  # Return processed frame
    cap.release()


# Create Gradio App
with gr.Blocks() as demo:
    gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)")
    
    with gr.Row():
        webcam_feed = gr.Camera(streaming=True, label="Live Webcam")
        processed_image = gr.Image(label="AI-Enhanced Webcam Feed")
    
    with gr.Row():
        canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output")
    
    prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape")
    start_button = gr.Button("Start Real-Time AI Enhancement")
    
    start_button.click(fn=video_stream, inputs=[prompt_input], outputs=[processed_image, canvas_output])

demo.launch(share=True)