pce / app.py
catiR
force align tts, add voices
366ecce
raw
history blame
3.49 kB
import gradio as gr
import subprocess, os
import scripts.runSQ
#https://huggingface.co/spaces/clr/prosalign/blob/main/app.py
def setup():
r0 = subprocess.run(["pwd"], capture_output=True, text=True)
print('PWD::', r0.stdout)
r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True)
print(r1.stdout)
subprocess.run(["unzip", "./master.zip"])
subprocess.run(["mv", "REAPER-master", "REAPER"])
subprocess.run(["rm", "./master.zip"])
os.chdir('./REAPER')
subprocess.run(["mkdir", "build"])
os.chdir('./build')
r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True)
print(r2.stdout)
r3 = subprocess.run(["make"], capture_output=True, text=True)
print(r3.stdout)
os.chdir('../..')
r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True)
print('LS::', r9.stdout)
print('about to setup')
setup()
def f1(voices, sent, indices):
#tts_audio, tts_score, graph = scripts.runSQ.run(sent, voices, indices)
tts_audio, tts_score, tts_fig_p, mid_fig_p, bad_fig_p, tts_fig_e, fig_mid_e, fig_bad_e = scripts.runSQ.run(sent, [voices], indices)
score_report = f'Difference from TTS to real speech: {round(tts_score,2)}'
return (tts_audio, score_report, tts_fig_p, mid_fig_p, bad_fig_p, tts_fig_e, fig_mid_e, fig_bad_e)
def label_indices(sentence):
sentence = scripts.runSQ.snorm(sentence)
sentence = sentence.split(' ')
labelled = [(f'{word} {i+1} ', str(i+1)) for i, word in enumerate(sentence)]
return labelled
temp_sentences = scripts.runSQ.create_temp_sent_list()
bl = gr.Blocks()
with bl:
#temp_sentences = ['Litlaus græn hugmynd?','Var það ekki nóg?', 'Ef svo er hvað heita þau þá?','Eru maríuhænur á Íslandi?']
voices = ['Alfur_v2', 'Dilja_v2', 'Alfur','Dilja', 'Bjartur', 'Rosa', 'Karl', 'Dora']
#with gr.Row():
#with gr.Column(scale=4):
temp_sentmenu = gr.Dropdown(temp_sentences, label="Sentence")
#voiceselect = gr.CheckboxGroup(voices, label="TTS voice",value='Alfur')
marked_sentence = gr.HighlightedText(interactive=False,label="Word selection key",color_map = {str(i):"#dcfce7" for i in range(333)})
with gr.Row():
spanselect = gr.Textbox(value='1-3',label="Select words",info='Enter the index of the word(s) to analyse, according to the key above. It can be a single word: 4 or a span of words separated by a dash: 2-3')
voiceselect = gr.Radio(voices, label="TTS voice",value='Alfur')
#with gr.Column(scale=1):
temp_button = gr.Button(value="Run with selected options")
tts_output = gr.Audio(interactive=False)
report_score = gr.Markdown('Difference from TTS to real speech:')
with gr.Tabs():
with gr.TabItem("Pitch"):
pl1 = gr.Plot()
with gr.Row():
pl2 = gr.Plot()
pl3 = gr.Plot()
with gr.TabItem("Energy"):
pl4 = gr.Plot()
with gr.Row():
pl5 = gr.Plot()
pl6 = gr.Plot()
temp_sentmenu.input(label_indices,temp_sentmenu,marked_sentence)
temp_button.click(f1,[voiceselect,temp_sentmenu,spanselect],[tts_output,report_score,pl1,pl2,pl3,pl4,pl5,pl6])
if __name__ == "__main__":
bl.launch()