pce / scripts /tapi.py
catiR
queries only sentences at least 10 speakers
5c7029b
raw
history blame
1.64 kB
import json, os, requests, warnings, wave
warnings.filterwarnings("ignore")
# synthesise speech
# save 16khz mono wav file
# and word-level timestamps
# return paths to wave and alignment files
def tiro(text,voice,save='./'):
# endpoint working 2023
url = 'https://tts.tiro.is/v0/speech'
headers = {'Content-Type': 'application/json'}
# synthesis
payload_tts = {
"Engine": "standard",
"LanguageCode": "is-IS",
"OutputFormat": "pcm",
"SampleRate":"16000",
"Text": text,
"VoiceId": voice
}
# word time alignments
payload_aln = {
"Engine": "standard",
"LanguageCode": "is-IS",
"OutputFormat": "json",
"SpeechMarkTypes": ["word"],
"Text": text,
"VoiceId": voice
}
tts_data = requests.post(url, headers=headers, json=payload_tts, verify=False)
aln_data = requests.post(url, headers=headers, json=payload_aln, verify=False)
#fname = save+text.replace(':','').replace('/','-')
#wname = fname+'.wav'
#aname = fname+'.json'
wname = save+voice+'.wav'
aname = save+voice+'.json'
with wave.open(wname,'wb') as f:
f.setnchannels(1)
f.setframerate(16000)
f.setsampwidth(2)
f.writeframes(tts_data.content)
with open(aname,'w') as f:
f.write('{"alignments": [')
f.write(aln_data.content.decode().replace('}\n{','},\n {'))
f.write(']}')
return(os.path.abspath(wname),os.path.abspath(aname))
#sentence = "Hæ hæ hæ hæ! Ég heiti Gervimaður Finnland, en þú?"
#voice = "Alfur"
#wf, af = tiro(sentence,voice)
#print(wf, af)