catiR
commited on
Commit
·
06af375
1
Parent(s):
e5e7284
app
Browse files- scripts/reaper2pass.py +73 -0
- scripts/runSQ.py +19 -0
scripts/reaper2pass.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import soundfile as sf
|
2 |
+
import numpy as np
|
3 |
+
from scipy import signal
|
4 |
+
from pydub import AudioSegment
|
5 |
+
import subprocess
|
6 |
+
import os
|
7 |
+
|
8 |
+
# ref. Hirst The analysis by synthesis of speech melody: from data to models
|
9 |
+
|
10 |
+
|
11 |
+
# reaper requires wav file path input,
|
12 |
+
# not audio data itself.
|
13 |
+
# reaper does NOT require 16khz mono audio.
|
14 |
+
def reaper_soundfile(sound_path, orig_filetype):
|
15 |
+
|
16 |
+
aud_data = AudioSegment.from_file(sound_path, orig_filetype)
|
17 |
+
curdir = subprocess.run(["pwd"], capture_output=True, text=True)
|
18 |
+
curdir = curdir.stdout.splitlines()[0]
|
19 |
+
fname = sound_path.split('/')[-1].replace(orig_filetype,'')
|
20 |
+
tmp_path = f'{curdir}/REAPER_TMP/{fname}_tmp.wav'
|
21 |
+
if not os.path.exists(f'{curdir}/REAPER_TMP'):
|
22 |
+
os.mkdir(f'{curdir}/REAPER_TMP')
|
23 |
+
aud_data.export(tmp_path, format="wav")
|
24 |
+
wav_path = tmp_path
|
25 |
+
|
26 |
+
return wav_path
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
def get_reaper(wav_path, maxf0='700', minf0='50', reaper_path = "REAPER/build/reaper"):
|
31 |
+
|
32 |
+
f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
|
33 |
+
#print('PLAIN:',f0_data)
|
34 |
+
f0_data = f0_data.decode()
|
35 |
+
#print('DECODE-PITCH:',f0_data)
|
36 |
+
f0_data = f0_data.split('EST_Header_End\n')[1].splitlines()
|
37 |
+
#print(f0_data)
|
38 |
+
f0_data = [l.split(' ') for l in f0_data]
|
39 |
+
f0_data = [l for l in f0_data if len(l) == 3] # the last line or 2 lines are other info, different format
|
40 |
+
f0_data = [ [float(t), float(f)] for t,v,f in f0_data if v=='1']
|
41 |
+
|
42 |
+
return f0_data
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# 2 pass pitch estimation
|
47 |
+
def estimate_pitch(sound_path):
|
48 |
+
|
49 |
+
orig_ftype = sound_path.split('.')[-1]
|
50 |
+
if orig_ftype == '.wav':
|
51 |
+
wav_path = sound_path
|
52 |
+
else:
|
53 |
+
tmp_path = reaper_soundfile(sound_path)
|
54 |
+
wav_path = tmp_path
|
55 |
+
|
56 |
+
print('REAPER FILE PATH:', wav_path)
|
57 |
+
|
58 |
+
first_pass = get_reaper(wav_path)
|
59 |
+
first_pass = [f for t,f in first_pass]
|
60 |
+
|
61 |
+
q1 = np.quantile(first_pass,0.25)
|
62 |
+
q3 = np.quantile(first_pass,0.75)
|
63 |
+
|
64 |
+
pfloor = 0.75 * q1
|
65 |
+
pceil = 1.5 * q3
|
66 |
+
|
67 |
+
second_pass = get_reaper(wav_path,maxf0 = str(round(pceil)), minf0 = str(round(pfloor)))
|
68 |
+
|
69 |
+
|
70 |
+
if orig_ftype != '.wav':
|
71 |
+
subprocess.run(["rm", tmp_path])
|
72 |
+
|
73 |
+
return second_pass
|
scripts/runSQ.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os, unicodedata
|
2 |
from scripts.ctcalign import aligner, wav16m
|
3 |
from scripts.tapi import tiro
|
|
|
4 |
|
5 |
# given a Sentence string,
|
6 |
# using a metadata file of SQ, // SQL1adult_metadata.tsv
|
@@ -34,6 +35,14 @@ def run(sentence, voices):
|
|
34 |
f0_tts(sentence, voices, tts_dir, 'TODO path to reaper')
|
35 |
|
36 |
# by now, all the data to cluster and eval exists in the right place.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return temp_a_sample
|
39 |
|
@@ -115,7 +124,17 @@ def f0_human(meta, f0_dir, speech_dir, reaper_path):
|
|
115 |
print(f'Need to estimate pitch for {len(no_f0)} recordings')
|
116 |
if not os.path.exists(f0_dir):
|
117 |
os.makedirs(f0_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
#TODO
|
|
|
|
|
|
|
|
|
119 |
|
120 |
else:
|
121 |
print('All speech pitch trackings existed')
|
|
|
1 |
import os, unicodedata
|
2 |
from scripts.ctcalign import aligner, wav16m
|
3 |
from scripts.tapi import tiro
|
4 |
+
from scripts.reaper2pass import estimate_pitch
|
5 |
|
6 |
# given a Sentence string,
|
7 |
# using a metadata file of SQ, // SQL1adult_metadata.tsv
|
|
|
35 |
f0_tts(sentence, voices, tts_dir, 'TODO path to reaper')
|
36 |
|
37 |
# by now, all the data to cluster and eval exists in the right place.
|
38 |
+
# (after the last todo of saving pitch to disk instead of only list)
|
39 |
+
|
40 |
+
# next, make a thing that does clustering.
|
41 |
+
# its input is Meta + the paths to find wav, aln, f0 datas.
|
42 |
+
|
43 |
+
# its output may as well actually be graphs lol
|
44 |
+
|
45 |
+
# also stop forgetting duration.
|
46 |
|
47 |
return temp_a_sample
|
48 |
|
|
|
124 |
print(f'Need to estimate pitch for {len(no_f0)} recordings')
|
125 |
if not os.path.exists(f0_dir):
|
126 |
os.makedirs(f0_dir)
|
127 |
+
for rec in no_f0:
|
128 |
+
wav_path = f'{speech_dir}{rec[2]}'
|
129 |
+
temp_data_f0 = estimate_pitch(wav_path)
|
130 |
+
print('2ND PASS PITCHES FOR', wav_path)
|
131 |
+
print(temp_data_f0)
|
132 |
+
|
133 |
#TODO
|
134 |
+
# Current Todo:
|
135 |
+
# have pitch saved to file instead of returned to data
|
136 |
+
# tbqh can write my own simplified files instead of make reaper write
|
137 |
+
#whatever.
|
138 |
|
139 |
else:
|
140 |
print('All speech pitch trackings existed')
|