Spaces:

parler-tts
/

parler-tts-expresso

Running on Zero

App Files Files Community

sanchit-gandhi commited on May 15, 2024

Commit

e95e308

1 Parent(s): 2f42453

update examples

Browse files

Files changed (1) hide show

app.py +7 -8

app.py CHANGED Viewed

@@ -10,8 +10,7 @@ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# TODO(SG): update to the latest checkpoint
-repo_id = "reach-vb/parler-tts-expresso-mistral-v0.1"
 model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
@@ -21,18 +20,18 @@ SAMPLE_RATE = feature_extractor.sampling_rate
 SEED = 42
 default_text = "*Remember* - this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of *five times*."
-default_description = "Thomas speaks with emphasis at a moderate pace with high quality."
 examples = [
     [
-        "Remember - this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of five times.",
-        "Thomas speaks sadly at a very slow pace with high quality."
     ],
     [
-        "Shhh! Did you know? You can reproduce this entire training recipe by following the steps outlined on the model card. It only takes one hour to train!",
-        "Talia whispers quickly with high quality audio.",
     ],
     [
-        "But that's no secret! The entire project is open-source first. We are releasing all datasets, training and inference code, so that you can use them yourself!",
         "Elisabeth speaks happily at a slightly slower than average pace with high quality audio.",
     ],
     [

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
+repo_id = "parler-tts/parler-tts-mini-expresso"
 model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
 SEED = 42
 default_text = "*Remember* - this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of *five times*."
+default_description = "Thomas speaks with emphasis and excitement at a moderate pace with high quality."
 examples = [
     [
+        "Remember - this is only the first iteration of the model. To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of five times.",
+        "Thomas speaks in a sad tone at a very slow pace with high quality."
     ],
     [
+        "Did you know? <laugh> You can reproduce this entire training recipe by following the steps outlined on the model card!",
+        "Talia speaks quickly with laughter and high quality audio.",
     ],
     [
+        "But that's no secret! The entire project is open-source first, with all release artefacts on the Hub.",
         "Elisabeth speaks happily at a slightly slower than average pace with high quality audio.",
     ],
     [