import gradio as gr import subprocess, os import scripts.runSQ #https://huggingface.co/spaces/clr/prosalign/blob/main/app.py def setup(): r0 = subprocess.run(["pwd"], capture_output=True, text=True) print('PWD::', r0.stdout) r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) print(r1.stdout) subprocess.run(["unzip", "./master.zip"]) subprocess.run(["mv", "REAPER-master", "REAPER"]) subprocess.run(["rm", "./master.zip"]) os.chdir('./REAPER') subprocess.run(["mkdir", "build"]) os.chdir('./build') r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) print(r2.stdout) r3 = subprocess.run(["make"], capture_output=True, text=True) print(r3.stdout) os.chdir('../..') r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) print('LS::', r9.stdout) print('about to setup') setup() def f1(voices, sent, indices): tts_audio, tts_score, graph = scripts.runSQ.run(sent, voices, indices) score_report = f'Difference from TTS to real speech: {round(tts_score,2)}' return (tts_audio, score_report, graph) def label_indices(sentence): sentence = scripts.runSQ.snorm(sentence) sentence = sentence.split(' ') labelled = [(f'{word} {i+1} ', str(i+1)) for i, word in enumerate(sentence)] return labelled bl = gr.Blocks() with bl: temp_sentences = ['Litlaus græn hugmynd?','Var það ekki nóg?', 'Ef svo er hvað heita þau þá?','Eru maríuhænur á Íslandi?'] voices = ['Alfur','Dilja'] # currently i only get json speech marks for those two. # supposedly they also provided for Karl, Dora, but i dont even get their wavs # i get everyone elses wavs tho #with gr.Row(): #with gr.Column(scale=4): temp_sentmenu = gr.Dropdown(temp_sentences, label="Sentence") #voiceselect = gr.CheckboxGroup(voices, label="TTS voice",value='Alfur') marked_sentence = gr.HighlightedText(interactive=False,label="Word selection key",color_map = {str(i):"#dcfce7" for i in range(333)}) with gr.Row(): spanselect = gr.Textbox(value='1-3',label="Select words",info='Enter the index of the word(s) to analyse, according to the key above. It can be a single word: 4 or a span of words separated by a dash: 2-3') voiceselect = gr.Radio(voices, label="TTS voice",value='Alfur') #with gr.Column(scale=1): temp_button = gr.Button(value="Run with selected options") tts_output = gr.Audio(interactive=False) report_score = gr.Markdown('Difference from TTS to real speech:') pl1 = gr.Plot() temp_sentmenu.input(label_indices,temp_sentmenu,marked_sentence) temp_button.click(f1,[voiceselect,temp_sentmenu,spanselect],[tts_output,report_score,pl1]) if __name__ == "__main__": bl.launch()