ky / app.py
harpalsinh8's picture
Update app.py
f441ec8 verified
raw
history blame contribute delete
925 Bytes
# ======================
# Setup
# ======================
!git clone https://github.com/kyutai-labs/delayed-streams-modeling.git
%cd delayed-streams-modeling
!pip install -q torch torchaudio gradio moshi
# ======================
# Import + Load Model
# ======================
import torch
import torchaudio
from moshi.models import TTSModel
# Load Kyutai TTS model (English + French)
model_id = "kyutai/tts-1.6b-en_fr"
model = TTSModel.from_pretrained(model_id, device="cuda" if torch.cuda.is_available() else "cpu")
# ======================
# Simple test function
# ======================
def synthesize(text, lang="en", filename="out.wav"):
audio = model.generate(text, language=lang)
torchaudio.save(filename, audio.cpu(), 16000)
return filename
# Example: run once to check
synthesize("Hello, this is Kyutai TTS running on Kaggle!", "en", "demo.wav")
import IPython.display as ipd
ipd.Audio("demo.wav")