import os
import gradio as gr
import torch
import tempfile
import asyncio
import edge_tts
import spaces
from pydub import AudioSegment
from threading import Thread
from collections.abc import Iterator
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

DESCRIPTION = """
# QwQ Tiny with Edge TTS (MP3 Output)
"""

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_id = "prithivMLmods/FastThink-0.5B-Tiny"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model.eval()

async def text_to_speech(text: str) -> str:
    """Converts text to speech using Edge TTS, converts WAV to MP3, and returns the MP3 file path."""
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
        wav_path = tmp_wav.name
    
    communicate = edge_tts.Communicate(text)
    await communicate.save(wav_path)

    # Convert WAV to MP3
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_mp3:
        mp3_path = tmp_mp3.name
    
    audio = AudioSegment.from_wav(wav_path)
    audio.export(mp3_path, format="mp3")

    os.remove(wav_path)  # Delete the original WAV file
    return mp3_path  # Return the MP3 file path
    
@spaces.GPU
def generate(
    message: str,
    chat_history: list[dict],
    max_new_tokens: int = 1024,
    temperature: float = 0.6,
    top_p: float = 0.9,
    top_k: int = 50,
    repetition_penalty: float = 1.2,
) -> Iterator[str] | str:
    
    is_tts = message.strip().startswith("@tts")
    is_text_only = message.strip().startswith("@text")

    # Remove special tags
    if is_tts:
        message = message.replace("@tts", "").strip()
    elif is_text_only:
        message = message.replace("@text", "").strip()

    conversation = [*chat_history, {"role": "user", "content": message}]
    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")

    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

    input_ids = input_ids.to(model.device)

    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = {
        "input_ids": input_ids,
        "streamer": streamer,
        "max_new_tokens": max_new_tokens,
        "do_sample": True,
        "top_p": top_p,
        "top_k": top_k,
        "temperature": temperature,
        "num_beams": 1,
        "repetition_penalty": repetition_penalty,
    }
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)

    final_output = "".join(outputs)

    # If TTS requested, generate speech and return audio file
    if is_tts:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        audio_path = loop.run_until_complete(text_to_speech(final_output))
        return audio_path  

    return final_output  # 

demo = gr.ChatInterface(
    fn=generate,
    additional_inputs=[
        gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
        gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6),
        gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
        gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50),
        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
    ],
    stop_btn=None,
    examples=[
        ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
        ["@text What is AI?"],
        ["@tts Explain Newton's third law of motion."],
        ["@text Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
    ],
    cache_examples=False,
    type="messages",
    description=DESCRIPTION,
    fill_height=True,
)

if __name__ == "__main__":
    demo.queue(max_size=20).launch()