# app.py - SmallLM Gradio Demo
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
warnings.filterwarnings("ignore")

# Global variables for model and tokenizer
model = None
tokenizer = None

def load_model():
    """Load the SmallLM model and tokenizer"""
    global model, tokenizer
    
    try:
        print("Loading SmallLM model...")
        model_name = "XsoraS/SmallLM"
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Add padding token if it doesn't exist
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Load model
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None,
            trust_remote_code=True
        )
        
        print("Model loaded successfully!")
        return "Model loaded successfully!"
        
    except Exception as e:
        error_msg = f"Error loading model: {str(e)}"
        print(error_msg)
        return error_msg

def generate_text(prompt, max_length=100, temperature=0.7, top_p=0.9):
    """Generate text using the loaded model"""
    global model, tokenizer
    
    if model is None or tokenizer is None:
        return "Please load the model first!"
    
    try:
        # Tokenize input
        inputs = tokenizer.encode(prompt, return_tensors="pt")
        
        # Move to same device as model
        if torch.cuda.is_available():
            inputs = inputs.to(model.device)
        
        # Generate
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_length=max_length,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                num_return_sequences=1
            )
        
        # Decode output
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Return only the new generated part
        return generated_text[len(prompt):].strip()
        
    except Exception as e:
        return f"Error generating text: {str(e)}"

def clear_text():
    """Clear the input and output"""
    return "", ""

# Create Gradio interface
with gr.Blocks(title="SmallLM Demo", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 SmallLM Inference Demo")
    gr.Markdown("Simple demo for XsoraS/SmallLM text generation")
    
    with gr.Row():
        with gr.Column(scale=1):
            load_btn = gr.Button("🔄 Load Model", variant="primary")
            status = gr.Textbox(
                label="Status", 
                value="Click 'Load Model' to start",
                interactive=False
            )
    
    with gr.Row():
        with gr.Column(scale=2):
            prompt_input = gr.Textbox(
                label="Enter your prompt:",
                placeholder="Once upon a time...",
                lines=3
            )
            
            with gr.Row():
                max_length = gr.Slider(
                    label="Max Length",
                    minimum=10,
                    maximum=500,
                    value=100,
                    step=10
                )
                temperature = gr.Slider(
                    label="Temperature",
                    minimum=0.1,
                    maximum=2.0,
                    value=0.7,
                    step=0.1
                )
                top_p = gr.Slider(
                    label="Top P",
                    minimum=0.1,
                    maximum=1.0,
                    value=0.9,
                    step=0.05
                )
            
            with gr.Row():
                generate_btn = gr.Button("✨ Generate", variant="primary")
                clear_btn = gr.Button("🗑️ Clear")
        
        with gr.Column(scale=2):
            output = gr.Textbox(
                label="Generated Text:",
                lines=10,
                interactive=False
            )
    
    # Event handlers
    load_btn.click(
        fn=load_model,
        outputs=status
    )
    
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt_input, max_length, temperature, top_p],
        outputs=output
    )
    
    clear_btn.click(
        fn=clear_text,
        outputs=[prompt_input, output]
    )
    
    # Examples
    gr.Examples(
        examples=[
            ["The future of artificial intelligence is"],
            ["In a world where technology and nature coexist"],
            ["Write a short story about a robot who"],
            ["Explain quantum computing in simple terms:"],
        ],
        inputs=prompt_input
    )

if __name__ == "__main__":
    demo.launch()