Dionyssos commited on
Commit
a183432
·
1 Parent(s): c63ff96
app.py CHANGED
@@ -62,7 +62,7 @@ def audionar_tts(text=None,
62
  text = 'No Audio or Txt Input'
63
 
64
 
65
- print(lang, lang in language_names)
66
 
67
  if lang not in language_names: # StyleTTS2
68
 
@@ -126,26 +126,26 @@ def audionar_tts(text=None,
126
 
127
  # PAD
128
 
129
- len_speech = len(speech_audio)
130
  len_background = len(background_audio)
131
 
132
  if len_background > len_speech:
133
  padding = np.zeros(len_background - len_speech,
134
  dtype=np.float32)
135
- speech_audio = np.concatenate([speech_audio, padding])
136
  elif len_speech > len_background:
137
  padding = np.zeros(len_speech - len_background,
138
  dtype=np.float32)
139
  background_audio = np.concatenate([background_audio, padding])
140
 
141
 
142
- speech_audio = speech_audio[None, :]
143
  background_audio = background_audio[None, :]
144
 
145
 
146
  final_audio = np.concatenate([
147
- 0.49 * speech_audio + 0.51 * background_audio,
148
- 0.51 * background_audio + 0.49 * speech_audio
149
  ], 0)
150
 
151
  else:
@@ -155,7 +155,7 @@ def audionar_tts(text=None,
155
 
156
  wavfile = '_vits_.wav'
157
  audiofile.write(wavfile, final_audio, 16000)
158
- return wavfile, wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
159
 
160
 
161
 
@@ -495,46 +495,33 @@ VOICES = [t[:-4] for t in VOICES] # crop .wav for visuals in gr.DropDown
495
 
496
  _tts = StyleTTS2().to('cpu')
497
 
498
-
499
- with gr.Blocks(theme='huggingface') as demo:
500
- with gr.Tab(label="TTS"):
501
- with gr.Row():
502
- text_input = gr.Textbox(
503
- label="Type text for TTS:",
504
- placeholder="Type Text for TTS",
505
- lines=4,
506
- value="Farover the misty mountains cold too dungeons deep and caverns old.",
507
- )
508
- choice_dropdown = gr.Dropdown(
509
- choices=language_names + VOICES,
510
- label="Select Voice or Language",
511
- value=VOICES[0]
512
- )
513
- soundscape_input = gr.Textbox(
514
- lines=1,
515
- value="frogs",
516
- label="AudioGen Txt"
517
- )
518
- kv_input = gr.Number(
519
- label="kv Period",
520
- value=24,
521
- )
522
- generate_button = gr.Button("Generate Audio", variant="primary")
523
-
524
- output_audio = gr.Audio(label="TTS Output")
525
-
526
- generate_button.click(
527
- fn=audionar_tts,
528
- inputs=[text_input, choice_dropdown, soundscape_input, kv_input],
529
- outputs=[output_audio]
530
  )
 
 
 
 
 
531
 
532
- with gr.Tab(label="API"):
533
- with gr.Row():
534
- with gr.Column():
535
-
536
-
537
- gr.Markdown("Only the first two seconds of the audio will be processed.")
538
-
539
-
540
- demo.launch(debug=True)
 
62
  text = 'No Audio or Txt Input'
63
 
64
 
65
+
66
 
67
  if lang not in language_names: # StyleTTS2
68
 
 
126
 
127
  # PAD
128
 
129
+ len_speech = len(x)
130
  len_background = len(background_audio)
131
 
132
  if len_background > len_speech:
133
  padding = np.zeros(len_background - len_speech,
134
  dtype=np.float32)
135
+ x = np.concatenate([x, padding])
136
  elif len_speech > len_background:
137
  padding = np.zeros(len_speech - len_background,
138
  dtype=np.float32)
139
  background_audio = np.concatenate([background_audio, padding])
140
 
141
 
142
+ x = x[None, :]
143
  background_audio = background_audio[None, :]
144
 
145
 
146
  final_audio = np.concatenate([
147
+ 0.49 * x + 0.51 * background_audio,
148
+ 0.51 * background_audio + 0.49 * x
149
  ], 0)
150
 
151
  else:
 
155
 
156
  wavfile = '_vits_.wav'
157
  audiofile.write(wavfile, final_audio, 16000)
158
+ return wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
159
 
160
 
161
 
 
495
 
496
  _tts = StyleTTS2().to('cpu')
497
 
498
+ demo = gr.Interface(
499
+ fn=audionar_tts,
500
+ inputs=[
501
+ gr.Textbox(
502
+ label="Type text for TTS:",
503
+ placeholder="Type Text for TTS",
504
+ lines=4,
505
+ value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμπελη σκυλο.',
506
+ ),
507
+ gr.Dropdown(
508
+ choices=language_names + VOICES,
509
+ label="TTS lang",
510
+ value=language_names[0]
511
+ ),
512
+ gr.Textbox(
513
+ lines=1,
514
+ value="frogs",
515
+ label="AudioGen Txt"
516
+ ),
517
+ gr.Number(
518
+ label="kv cache",
519
+ value=24,
 
 
 
 
 
 
 
 
 
 
520
  )
521
+ ],
522
+ outputs=gr.Audio(label="TTS Output"),
523
+ title="TTS with a Simple Interface",
524
+ theme="huggingface"
525
+ )
526
 
527
+ demo.launch()
 
 
 
 
 
 
 
 
female-20-happy.wav DELETED
Binary file (51 kB)
 
female-46-neutral.wav DELETED
Binary file (37.6 kB)
 
male-27-sad.wav DELETED
Binary file (50.4 kB)
 
male-60-angry.wav DELETED
Binary file (60.5 kB)
 
requirements.txt CHANGED
@@ -3,7 +3,6 @@ nltk
3
  librosa
4
  phonemizer
5
  audiofile
6
- matplotlib
7
  audresample
8
  num2words
9
  numpy<2.0.0
 
3
  librosa
4
  phonemizer
5
  audiofile
 
6
  audresample
7
  num2words
8
  numpy<2.0.0