import gradio as gr
from transformers import AutoProcessor, VitsModel
import torch
import scipy.io.wavfile
import tempfile

# Load the Fon TTS model from Meta AI
model = VitsModel.from_pretrained("facebook/mms-tts-fon")
processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")

# Automatically get sampling rate from model config
sampling_rate = model.config.sampling_rate

# Function to synthesize Fon audio from text
def tts_fon(text):
    inputs = processor(text, return_tensors="pt")
    with torch.no_grad():
        audio = model(**inputs).waveform[0].numpy()

    # Save temporary WAV file using the model's sampling rate
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
        return f.name

# Title and detailed description
title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS"
description = """
This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language.
The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project.

Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output.

---

### 🔧 How to Use:
1. Type a sentence in **Fon** (Latin script, tone markers optional).
2. Press **Submit** or hit **Enter**.
3. Wait a few seconds for audio synthesis.
4. Listen or download the audio.

---

### 📜 Rules & Notes:
1. Input should be in **Fon** only (avoid English or other languages).
2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended.
3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important.
4. Tone marks like `à, é, ǒ, ê` are supported but optional.
5. Output uses a single female voice (pretrained by Meta).
6. Audio is generated at the model’s default sampling rate (may vary by version).
7. Model is intended for **research and demonstration** only.
8. Do **not** use for commercial purposes without permission.
9. Underlying model licensed under **CC-BY-NC 4.0**.
10. Please be respectful — offensive or inappropriate input is not allowed.

---

✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗
"""

# Gradio interface
iface = gr.Interface(
    fn=tts_fon,
    inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3),
    outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"),
    title=title,
    description=description,
    theme="default"
)

iface.launch()