import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import subprocess
import os
from underthesea import word_tokenize

def run_shell_command(command):
    try:
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, error = process.communicate()
        if process.returncode != 0:
            raise Exception(f"Error running command: {command}\n{error.decode('utf-8')}")
        return output.decode('utf-8')
    except Exception as e:
        raise Exception(f"Failed to execute command: {command}\n{str(e)}")

def load_model_and_tokenizer(model_path):
    try:
        # Load the trained tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        # Load the trained model
        model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
        # Move the model to the GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        return tokenizer, model, device
    except Exception as e:
        raise Exception(f"Failed to load model or tokenizer from {model_path}: {str(e)}")

def generate_text(tokenizer, model, device, prompt, max_length=100,
                  num_return_sequences=1, top_p=0.95, temperature=0.7, seed=123):
    # Set the random seed for reproducibility
    torch.manual_seed(seed)
    if device.type == "cuda":
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # For multi-GPU setups

    # Tokenize the input prompt with word segmentation
    prompt = word_tokenize(prompt, format='text')
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)

    # Generate text
    output = model.generate(
        input_ids,
        max_length=int(max_length),
        num_return_sequences=int(num_return_sequences),
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=top_p,
        temperature=temperature,
        do_sample=True
    )

    # Convert the generated text back to a string
    generated_text = [tokenizer.decode(ids, skip_special_tokens=True).replace("_", " ").replace(" ,", ",").replace(" .", ".") for ids in output]
    return "\n\n".join(generated_text)  # Join multiple sequences with newlines

def gradio_generate_text(prompt, max_length, top_p, temperature, seed, num_return_sequences):
    try:
        # Load model and tokenizer
        model_path = "models/vi-medical-mt5-finetune-qa"
        tokenizer, model, device = load_model_and_tokenizer(model_path)
        # Generate text
        result = generate_text(tokenizer, model, device, prompt, max_length, num_return_sequences, top_p, temperature, seed)
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Ensure the models directory exists and clone the model if needed
if not os.path.exists('models'):
    os.makedirs('models')
if not os.path.exists('models/vi-medical-mt5-finetune-qa'):
    try:
        run_shell_command('git lfs install')
        run_shell_command('cd models && git clone https://huggingface.co/danhtran2mind/vi-medical-mt5-finetune-qa && cd ..')
    except Exception as e:
        print(f"Failed to clone model: {str(e)}")

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Vietnamese Medical mT5 Fine-Tune Question and Answer")
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(lines=3, label="Input Prompt", placeholder="Enter your prompt, e.g., 'vaccine covid-19 là gì?'")
            max_length = gr.Slider(minimum=10, maximum=768, value=32, label="Max Length", step=1)
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top-p Sampling", step=0.01)
            temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature", step=0.01)
            seed = gr.Slider(minimum=0, maximum=10000, value=123, label="Seed", step=1)
            num_return_sequences = gr.Slider(minimum=1, maximum=5, value=1, label="Number of Sequences", step=1)
            submit_button = gr.Button("Generate")
        with gr.Column():
            output = gr.Textbox(label="Generated Text", lines=10)
    
    submit_button.click(
        fn=gradio_generate_text,
        inputs=[prompt, max_length, top_p, temperature, seed, num_return_sequences],
        outputs=output
    )
demo.launch()