Spaces:

clr
/

pce

Sleeping

pce / app.py

catiR

force align tts, add voices

366ecce over 1 year ago

3.49 kB

	import gradio as gr
	import subprocess, os
	import scripts.runSQ


	#https://huggingface.co/spaces/clr/prosalign/blob/main/app.py


	def setup():
	r0 = subprocess.run(["pwd"], capture_output=True, text=True)
	print('PWD::', r0.stdout)
	r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True)
	print(r1.stdout)
	subprocess.run(["unzip", "./master.zip"])
	subprocess.run(["mv", "REAPER-master", "REAPER"])
	subprocess.run(["rm", "./master.zip"])
	os.chdir('./REAPER')
	subprocess.run(["mkdir", "build"])
	os.chdir('./build')
	r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True)
	print(r2.stdout)
	r3 = subprocess.run(["make"], capture_output=True, text=True)
	print(r3.stdout)

	os.chdir('../..')
	r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True)
	print('LS::', r9.stdout)


	print('about to setup')
	setup()


	def f1(voices, sent, indices):
	#tts_audio, tts_score, graph = scripts.runSQ.run(sent, voices, indices)
	tts_audio, tts_score, tts_fig_p, mid_fig_p, bad_fig_p, tts_fig_e, fig_mid_e, fig_bad_e = scripts.runSQ.run(sent, [voices], indices)
	score_report = f'Difference from TTS to real speech: {round(tts_score,2)}'
	return (tts_audio, score_report, tts_fig_p, mid_fig_p, bad_fig_p, tts_fig_e, fig_mid_e, fig_bad_e)


	def label_indices(sentence):
	sentence = scripts.runSQ.snorm(sentence)
	sentence = sentence.split(' ')
	labelled = [(f'{word} {i+1} ', str(i+1)) for i, word in enumerate(sentence)]
	return labelled



	temp_sentences = scripts.runSQ.create_temp_sent_list()

	bl = gr.Blocks()
	with bl:


	#temp_sentences = ['Litlaus græn hugmynd?','Var það ekki nóg?', 'Ef svo er hvað heita þau þá?','Eru maríuhænur á Íslandi?']

	voices = ['Alfur_v2', 'Dilja_v2', 'Alfur','Dilja', 'Bjartur', 'Rosa', 'Karl', 'Dora']


	#with gr.Row():
	#with gr.Column(scale=4):
	temp_sentmenu = gr.Dropdown(temp_sentences, label="Sentence")
	#voiceselect = gr.CheckboxGroup(voices, label="TTS voice",value='Alfur')

	marked_sentence = gr.HighlightedText(interactive=False,label="Word selection key",color_map = {str(i):"#dcfce7" for i in range(333)})

	with gr.Row():
	spanselect = gr.Textbox(value='1-3',label="Select words",info='Enter the index of the word(s) to analyse, according to the key above. It can be a single word: 4 or a span of words separated by a dash: 2-3')
	voiceselect = gr.Radio(voices, label="TTS voice",value='Alfur')

	#with gr.Column(scale=1):
	temp_button = gr.Button(value="Run with selected options")


	tts_output = gr.Audio(interactive=False)
	report_score = gr.Markdown('Difference from TTS to real speech:')

	with gr.Tabs():
	with gr.TabItem("Pitch"):

	pl1 = gr.Plot()
	with gr.Row():
	pl2 = gr.Plot()
	pl3 = gr.Plot()

	with gr.TabItem("Energy"):

	pl4 = gr.Plot()
	with gr.Row():
	pl5 = gr.Plot()
	pl6 = gr.Plot()





	temp_sentmenu.input(label_indices,temp_sentmenu,marked_sentence)
	temp_button.click(f1,[voiceselect,temp_sentmenu,spanselect],[tts_output,report_score,pl1,pl2,pl3,pl4,pl5,pl6])


	if __name__ == "__main__":
	bl.launch()