Spaces:

clr
/

pce

Sleeping

pce / scripts /tapi.py

catiR

queries only sentences at least 10 speakers

5c7029b over 1 year ago

1.64 kB

	import json, os, requests, warnings, wave
	warnings.filterwarnings("ignore")



	# synthesise speech
	# save 16khz mono wav file
	# and word-level timestamps
	# return paths to wave and alignment files
	def tiro(text,voice,save='./'):

	# endpoint working 2023
	url = 'https://tts.tiro.is/v0/speech'
	headers = {'Content-Type': 'application/json'}


	# synthesis
	payload_tts = {
	"Engine": "standard",
	"LanguageCode": "is-IS",
	"OutputFormat": "pcm",
	"SampleRate":"16000",
	"Text": text,
	"VoiceId": voice
	}

	# word time alignments
	payload_aln = {
	"Engine": "standard",
	"LanguageCode": "is-IS",
	"OutputFormat": "json",
	"SpeechMarkTypes": ["word"],
	"Text": text,
	"VoiceId": voice
	}


	tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)
	aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False)


	#fname = save+text.replace(':','').replace('/','-')
	#wname = fname+'.wav'
	#aname = fname+'.json'
	wname = save+voice+'.wav'
	aname = save+voice+'.json'

	with wave.open(wname,'wb') as f:
	f.setnchannels(1)
	f.setframerate(16000)
	f.setsampwidth(2)
	f.writeframes(tts_data.content)

	with open(aname,'w') as f:
	f.write('{"alignments": [')
	f.write(aln_data.content.decode().replace('}\n{','},\n {'))
	f.write(']}')

	return(os.path.abspath(wname),os.path.abspath(aname))




	#sentence = "Hæ hæ hæ hæ! Ég heiti Gervimaður Finnland, en þú?"
	#voice = "Alfur"

	#wf, af = tiro(sentence,voice)

	#print(wf, af)