import gradio as gr from transformers import AutoProcessor, VitsModel import torch import scipy.io.wavfile import tempfile # Load the Fon TTS model from Meta AI model = VitsModel.from_pretrained("facebook/mms-tts-fon") processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon") # Automatically get sampling rate from model config sampling_rate = model.config.sampling_rate # Function to synthesize Fon audio from text def tts_fon(text): inputs = processor(text, return_tensors="pt") with torch.no_grad(): audio = model(**inputs).waveform[0].numpy() # Save temporary WAV file using the model's sampling rate with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio) return f.name # Title and detailed description title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS" description = """ This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language. The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project. Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output. --- ### 🔧 How to Use: 1. Type a sentence in **Fon** (Latin script, tone markers optional). 2. Press **Submit** or hit **Enter**. 3. Wait a few seconds for audio synthesis. 4. Listen or download the audio. --- ### 📜 Rules & Notes: 1. Input should be in **Fon** only (avoid English or other languages). 2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended. 3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important. 4. Tone marks like `à, é, ǒ, ê` are supported but optional. 5. Output uses a single female voice (pretrained by Meta). 6. Audio is generated at the model’s default sampling rate (may vary by version). 7. Model is intended for **research and demonstration** only. 8. Do **not** use for commercial purposes without permission. 9. Underlying model licensed under **CC-BY-NC 4.0**. 10. Please be respectful — offensive or inappropriate input is not allowed. --- ✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗 """ # Gradio interface iface = gr.Interface( fn=tts_fon, inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3), outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"), title=title, description=description, theme="default" ) iface.launch()