import gradio as gr
import torch
import json
from PIL import Image
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from peft import PeftModel
import warnings
warnings.filterwarnings("ignore")

# Model configuration
BASE_MODEL = "Qwen/Qwen2-VL-7B-Instruct"
ADAPTER = "soupstick/qwen2vl-amazon-ft-lora"

# Global variables for lazy loading
model = None
processor = None

def load_model():
    """Load model and processor with CPU optimization"""
    global model, processor
    
    if model is None:
        print("⏳ Loading model (CPU mode)...")
        try:
            # Force CPU usage and optimize for memory
            model = Qwen2VLForConditionalGeneration.from_pretrained(
                BASE_MODEL,
                device_map="cpu",
                torch_dtype=torch.float32,  # Use float32 for CPU
                trust_remote_code=True,
                low_cpu_mem_usage=True,
                use_cache=True
            )
            
            # Load LoRA adapter
            print("⏳ Loading LoRA adapter...")
            model = PeftModel.from_pretrained(model, ADAPTER)
            
            # Load processor
            processor = AutoProcessor.from_pretrained(
                BASE_MODEL, 
                trust_remote_code=True
            )
            
            print("✅ Model loaded successfully!")
            
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            return False
    
    return True

def generate_listing(image, prompt="Generate Amazon listing."):
    """Generate Amazon listing from image"""
    
    if image is None:
        return "⚠️ Please upload an image."
    
    # Load model if not already loaded
    if not load_model():
        return "❌ Error: Could not load model. Please try again."
    
    try:
        # Resize image to reduce memory usage
        if image.size[0] > 512 or image.size[1] > 512:
            image.thumbnail((512, 512), Image.Resampling.LANCZOS)
        
        # Prepare chat messages
        messages = [{
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt}
            ],
        }]
        
        # Apply chat template
        text = processor.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        # Process inputs
        inputs = processor(
            text=text,
            images=image,
            return_tensors="pt"
        )
        
        # Generate with conservative settings for CPU
        print("⏳ Generating listing...")
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs,
                max_new_tokens=256,  # Reduced for CPU
                do_sample=True,
                temperature=0.7,
                top_p=0.8,
                pad_token_id=processor.tokenizer.eos_token_id
            )
        
        # Decode output
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        
        output_text = processor.batch_decode(
            generated_ids_trimmed, 
            skip_special_tokens=True, 
            clean_up_tokenization_spaces=False
        )[0]
        
        return output_text
        
    except Exception as e:
        return f"❌ Error generating listing: {str(e)}"

def format_example_output():
    """Show example of expected output format"""
    example = {
        "title": "Premium Wireless Bluetooth Headphones with Noise Cancellation",
        "bullet_points": [
            "• Advanced noise cancellation technology for immersive audio experience",
            "• 30-hour battery life with quick charge feature",
            "• Premium comfort design with soft ear cushions",
            "• Universal compatibility with all Bluetooth devices",
            "• Built-in microphone for crystal clear calls"
        ],
        "description": "Experience premium audio quality with these advanced wireless headphones...",
        "keywords": "wireless headphones, bluetooth, noise cancelling, premium audio",
        "category": "Electronics > Audio > Headphones"
    }
    return json.dumps(example, indent=2)

# Gradio Interface
def create_interface():
    with gr.Blocks(theme=gr.themes.Soft(), title="Amazon Listing Generator") as demo:
        gr.Markdown("""
        # 🛒 Qwen2-VL Amazon Listing Generator (LoRA)
        
        Upload a product image and generate an Amazon-style listing with title, bullet points, description, keywords, and category.
        
        **Model**: [soupstick/qwen2vl-amazon-ft-lora](https://huggingface.co/soupstick/qwen2vl-amazon-ft-lora) (Qwen2-VL-7B + LoRA)
        """)
        
        with gr.Row():
            with gr.Column():
                image_input = gr.Image(
                    type="pil", 
                    label="📸 Upload Product Image",
                    height=300
                )
                prompt_input = gr.Textbox(
                    label="📝 Instruction (Optional)",
                    value="Generate Amazon listing.",
                    placeholder="Enter custom instruction or use default",
                    lines=2
                )
                generate_btn = gr.Button(
                    "🚀 Generate Listing", 
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column():
                output_text = gr.Textbox(
                    label="📋 Generated Listing",
                    lines=15,
                    placeholder="Upload an image and click 'Generate Listing' to see results..."
                )
        
        # Example section
        with gr.Accordion("📋 Expected Output Format", open=False):
            gr.Code(
                format_example_output(),
                language="json",
                label="Example JSON Structure"
            )
        
        # Event handler
        generate_btn.click(
            fn=generate_listing,
            inputs=[image_input, prompt_input],
            outputs=output_text
        )
        
        # Footer
        gr.Markdown("""
        ---
        **⚠️ Note**: This demo runs on CPU which may take 1-2 minutes per generation. 
        For faster inference, consider upgrading to GPU hardware.
        
        **🔗 Links**: [Model Card](https://huggingface.co/soupstick/qwen2vl-amazon-ft-lora) | [Base Model](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)
        """)
    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(share=True)