"""
app.py

This Gradio app loads your fine-tuned model and serves as a therapeutic chatbot named "Serenity".
It uses a system prompt to steer the conversation in a supportive, open-ended manner.
"""

import gradio as gr
import torch
from transformers import TextStreamer
from unsloth import FastLanguageModel

# ---------------------------
# 1. Load your fine-tuned model
# ---------------------------
max_seq_length = 2048  # adjust as needed
load_in_4bit = True    # set to True if you used 4-bit quantization
dtype = None           # auto-detect dtype

# Replace with your actual model repository on Hugging Face Hub
model_name = "YOUR_USERNAME/YOUR_MODEL_REPO"  

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=load_in_4bit,
    dtype=dtype,
)
FastLanguageModel.for_inference(model)

# ---------------------------
# 2. Define the therapeutic system prompt
# ---------------------------
therapy_system_prompt = """
You are "Serenity", a compassionate, supportive, and curious Therapist. Your role is to:
1. **Validate First**: Start by validating emotions.
2. **Explore Gently**: Always ask open-ended questions using "What" or "How".
3. **Encourage Elaboration**: Make sure to ask for more details.
4. **Avoid Closure**: Never end with statements - always end with a question.
5. **Support Safety**: If serious issues emerge, support them as best as possible and validate their feelings.
"""

# ---------------------------
# 3. Define the response generation function
# ---------------------------
def respond(message, chat_history):
    """
    Generates a therapeutic response given a new user message and the conversation history.
    
    Parameters:
      message (str): The latest message from the user.
      chat_history (list): List of (user_message, assistant_response) tuples.
    
    Returns:
      A tuple with an empty string (clearing the input) and the updated chat history.
    """
    # Always include the system prompt at the beginning
    messages = [{"role": "system", "content": therapy_system_prompt}]
    
    # Append conversation history
    for user_msg, bot_resp in chat_history:
        messages.extend([
            {"role": "user", "content": user_msg},
            {"role": "assistant", "content": bot_resp}
        ])
    
    # Append the new user message
    messages.append({"role": "user", "content": message})
    
    # Tokenize with therapeutic context
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    
    # Generate the response
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=256,
        temperature=0.85,
        repetition_penalty=1.2,
        top_p=0.90,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )
    
    # Process response:
    # Decode the output and extract the assistant's reply.
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # The split strategy here might need adjustment depending on your template;
    # we assume the assistant reply is after the last occurrence of "assistant"
    therapy_response = full_response.split("assistant")[-1].strip()
    
    # Update chat history
    chat_history.append((message, therapy_response))
    
    return "", chat_history

# ---------------------------
# 4. Build the Gradio Interface
# ---------------------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal")) as demo:
    gr.Markdown("""
    # 🌿 Serenity - AI Therapist
    *A safe space for emotional support and reflection*
    """)
    
    # The chatbot component displays the conversation
    chatbot = gr.Chatbot(height=450, avatar_images=("user.png", "therapist.png"))
    msg = gr.Textbox(label="Share your feelings", placeholder="Type your message...")
    
    with gr.Row():
        submit_btn = gr.Button("Send", variant="primary")
        clear_btn = gr.Button("Clear History")
    
    # State to hold chat history as list of (user, assistant) tuples
    chat_state = gr.State([])
    
    # Interaction handlers:
    # When the user submits a message, generate a response and update the history.
    submit_btn.click(
        respond,
        [msg, chat_state],
        [msg, chatbot],
        queue=False
    )
    
    msg.submit(
        respond,
        [msg, chat_state],
        [msg, chatbot],
        queue=False
    )
    
    # Clear chat history handler
    clear_btn.click(
        lambda: [], None, chat_state, queue=False
    ).then(
        lambda: None, None, chatbot, queue=False
    )

# ---------------------------
# 5. Launch the app
# ---------------------------
demo.launch(debug=False, share=True)