import gradio as gr import cv2 import torch import numpy as np from diffusers import StableDiffusionPipeline from transformers import AutoProcessor, AutoModel, AutoTokenizer from PIL import Image # Load the Real-Time Latent Consistency Model device = "cuda" if torch.cuda.is_available() else "cpu" ##realtime_pipe = StableDiffusionPipeline.from_pretrained("radames/Real-Time-Latent-Consistency-Model").to(device) # Load the model (optimized for inference) model_id = "radames/Real-Time-Latent-Consistency-Model" tokenizer = AutoTokenizer.from_pretrained(model_id) realtime_pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) realtime_pipe.to("cuda") # Use GPU for faster inference def process_frame(frame, prompt="A futuristic landscape"): """Process a single frame using the real-time latent consistency model.""" # Convert frame to PIL image image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((512, 512)) # Apply Real-Time Latent Consistency Model result = realtime_pipe(prompt=prompt, image=image, strength=0.5, guidance_scale=7.5).images[0] return np.array(result) def video_stream(prompt): """Captures video feed from webcam and sends to the AI model.""" cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = process_frame(frame, prompt) yield frame # Return processed frame cap.release() # Create Gradio App with gr.Blocks() as demo: gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)") with gr.Row(): webcam_feed = gr.Camera(streaming=True, label="Live Webcam") processed_image = gr.Image(label="AI-Enhanced Webcam Feed") with gr.Row(): canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output") prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape") start_button = gr.Button("Start Real-Time AI Enhancement") start_button.click(fn=video_stream, inputs=[prompt_input], outputs=[processed_image, canvas_output]) demo.launch(share=True)