File size: 925 Bytes
f441ec8
 
 
 
 
 
 
 
 
 
 
 
45f6601
f441ec8
6998eea
f441ec8
6998eea
f441ec8
6998eea
f441ec8
 
 
 
 
 
 
6998eea
f441ec8
 
6998eea
f441ec8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# ======================
# Setup
# ======================
!git clone https://github.com/kyutai-labs/delayed-streams-modeling.git
%cd delayed-streams-modeling

!pip install -q torch torchaudio gradio moshi

# ======================
# Import + Load Model
# ======================
import torch
import torchaudio
from moshi.models import TTSModel

# Load Kyutai TTS model (English + French)
model_id = "kyutai/tts-1.6b-en_fr"
model = TTSModel.from_pretrained(model_id, device="cuda" if torch.cuda.is_available() else "cpu")

# ======================
# Simple test function
# ======================
def synthesize(text, lang="en", filename="out.wav"):
    audio = model.generate(text, language=lang)
    torchaudio.save(filename, audio.cpu(), 16000)
    return filename

# Example: run once to check
synthesize("Hello, this is Kyutai TTS running on Kaggle!", "en", "demo.wav")

import IPython.display as ipd
ipd.Audio("demo.wav")