Spaces:
Running
Running
audionar
Browse files
app.py
CHANGED
@@ -11,29 +11,16 @@ from transformers import Wav2Vec2Processor
|
|
11 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
|
12 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
|
13 |
import audiofile
|
14 |
-
import unicodedata
|
15 |
-
import textwrap
|
16 |
from tts import StyleTTS2
|
17 |
import audresample
|
18 |
-
|
19 |
-
# --
|
20 |
-
# -*- coding: utf-8 -*-
|
21 |
-
|
22 |
-
# https://huggingface.co/spaces/dpc/mmstts/tree/main
|
23 |
-
# https://huggingface.co/spaces/mms-meta/MMS/blob/main/tts.py
|
24 |
-
|
25 |
import json
|
26 |
-
import soundfile
|
27 |
import re
|
28 |
import unicodedata
|
29 |
-
import gradio as gr
|
30 |
import textwrap
|
31 |
-
import numpy as np
|
32 |
-
import torch
|
33 |
import nltk
|
34 |
from num2words import num2words
|
35 |
from num2word_greek.numbers2words import convert_numbers
|
36 |
-
from
|
37 |
|
38 |
nltk.download('punkt', download_dir='./')
|
39 |
nltk.download('punkt_tab', download_dir='./')
|
@@ -534,14 +521,14 @@ def audionar_tts(text=None,
|
|
534 |
lang_code=lang_code,
|
535 |
)[0, :]
|
536 |
total_audio.append(x)
|
537 |
-
|
538 |
print(f'\n\n_______________________________ {_t} {x.shape=}')
|
539 |
|
540 |
x = torch.cat(total_audio).cpu().numpy()
|
541 |
-
|
542 |
tmp_file = f'_speech.wav'
|
543 |
-
|
544 |
-
|
545 |
|
546 |
return tmp_file
|
547 |
|
|
|
11 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2Model
|
12 |
from transformers.models.wav2vec2.modeling_wav2vec2 import Wav2Vec2PreTrainedModel
|
13 |
import audiofile
|
|
|
|
|
14 |
from tts import StyleTTS2
|
15 |
import audresample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
import json
|
|
|
17 |
import re
|
18 |
import unicodedata
|
|
|
19 |
import textwrap
|
|
|
|
|
20 |
import nltk
|
21 |
from num2words import num2words
|
22 |
from num2word_greek.numbers2words import convert_numbers
|
23 |
+
from audionar import VitsModel, VitsTokenizer
|
24 |
|
25 |
nltk.download('punkt', download_dir='./')
|
26 |
nltk.download('punkt_tab', download_dir='./')
|
|
|
521 |
lang_code=lang_code,
|
522 |
)[0, :]
|
523 |
total_audio.append(x)
|
524 |
+
|
525 |
print(f'\n\n_______________________________ {_t} {x.shape=}')
|
526 |
|
527 |
x = torch.cat(total_audio).cpu().numpy()
|
528 |
+
|
529 |
tmp_file = f'_speech.wav'
|
530 |
+
|
531 |
+
audiofile.write(tmp_file, x, 16000)
|
532 |
|
533 |
return tmp_file
|
534 |
|