catiR commited on
Commit
06af375
·
1 Parent(s): e5e7284
Files changed (2) hide show
  1. scripts/reaper2pass.py +73 -0
  2. scripts/runSQ.py +19 -0
scripts/reaper2pass.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
2
+ import numpy as np
3
+ from scipy import signal
4
+ from pydub import AudioSegment
5
+ import subprocess
6
+ import os
7
+
8
+ # ref. Hirst The analysis by synthesis of speech melody: from data to models
9
+
10
+
11
+ # reaper requires wav file path input,
12
+ # not audio data itself.
13
+ # reaper does NOT require 16khz mono audio.
14
+ def reaper_soundfile(sound_path, orig_filetype):
15
+
16
+ aud_data = AudioSegment.from_file(sound_path, orig_filetype)
17
+ curdir = subprocess.run(["pwd"], capture_output=True, text=True)
18
+ curdir = curdir.stdout.splitlines()[0]
19
+ fname = sound_path.split('/')[-1].replace(orig_filetype,'')
20
+ tmp_path = f'{curdir}/REAPER_TMP/{fname}_tmp.wav'
21
+ if not os.path.exists(f'{curdir}/REAPER_TMP'):
22
+ os.mkdir(f'{curdir}/REAPER_TMP')
23
+ aud_data.export(tmp_path, format="wav")
24
+ wav_path = tmp_path
25
+
26
+ return wav_path
27
+
28
+
29
+
30
+ def get_reaper(wav_path, maxf0='700', minf0='50', reaper_path = "REAPER/build/reaper"):
31
+
32
+ f0_data = subprocess.run([reaper_path, "-i", wav_path, '-f', '/dev/stdout', '-x', maxf0, '-m', minf0, '-a'],capture_output=True).stdout
33
+ #print('PLAIN:',f0_data)
34
+ f0_data = f0_data.decode()
35
+ #print('DECODE-PITCH:',f0_data)
36
+ f0_data = f0_data.split('EST_Header_End\n')[1].splitlines()
37
+ #print(f0_data)
38
+ f0_data = [l.split(' ') for l in f0_data]
39
+ f0_data = [l for l in f0_data if len(l) == 3] # the last line or 2 lines are other info, different format
40
+ f0_data = [ [float(t), float(f)] for t,v,f in f0_data if v=='1']
41
+
42
+ return f0_data
43
+
44
+
45
+
46
+ # 2 pass pitch estimation
47
+ def estimate_pitch(sound_path):
48
+
49
+ orig_ftype = sound_path.split('.')[-1]
50
+ if orig_ftype == '.wav':
51
+ wav_path = sound_path
52
+ else:
53
+ tmp_path = reaper_soundfile(sound_path)
54
+ wav_path = tmp_path
55
+
56
+ print('REAPER FILE PATH:', wav_path)
57
+
58
+ first_pass = get_reaper(wav_path)
59
+ first_pass = [f for t,f in first_pass]
60
+
61
+ q1 = np.quantile(first_pass,0.25)
62
+ q3 = np.quantile(first_pass,0.75)
63
+
64
+ pfloor = 0.75 * q1
65
+ pceil = 1.5 * q3
66
+
67
+ second_pass = get_reaper(wav_path,maxf0 = str(round(pceil)), minf0 = str(round(pfloor)))
68
+
69
+
70
+ if orig_ftype != '.wav':
71
+ subprocess.run(["rm", tmp_path])
72
+
73
+ return second_pass
scripts/runSQ.py CHANGED
@@ -1,6 +1,7 @@
1
  import os, unicodedata
2
  from scripts.ctcalign import aligner, wav16m
3
  from scripts.tapi import tiro
 
4
 
5
  # given a Sentence string,
6
  # using a metadata file of SQ, // SQL1adult_metadata.tsv
@@ -34,6 +35,14 @@ def run(sentence, voices):
34
  f0_tts(sentence, voices, tts_dir, 'TODO path to reaper')
35
 
36
  # by now, all the data to cluster and eval exists in the right place.
 
 
 
 
 
 
 
 
37
 
38
  return temp_a_sample
39
 
@@ -115,7 +124,17 @@ def f0_human(meta, f0_dir, speech_dir, reaper_path):
115
  print(f'Need to estimate pitch for {len(no_f0)} recordings')
116
  if not os.path.exists(f0_dir):
117
  os.makedirs(f0_dir)
 
 
 
 
 
 
118
  #TODO
 
 
 
 
119
 
120
  else:
121
  print('All speech pitch trackings existed')
 
1
  import os, unicodedata
2
  from scripts.ctcalign import aligner, wav16m
3
  from scripts.tapi import tiro
4
+ from scripts.reaper2pass import estimate_pitch
5
 
6
  # given a Sentence string,
7
  # using a metadata file of SQ, // SQL1adult_metadata.tsv
 
35
  f0_tts(sentence, voices, tts_dir, 'TODO path to reaper')
36
 
37
  # by now, all the data to cluster and eval exists in the right place.
38
+ # (after the last todo of saving pitch to disk instead of only list)
39
+
40
+ # next, make a thing that does clustering.
41
+ # its input is Meta + the paths to find wav, aln, f0 datas.
42
+
43
+ # its output may as well actually be graphs lol
44
+
45
+ # also stop forgetting duration.
46
 
47
  return temp_a_sample
48
 
 
124
  print(f'Need to estimate pitch for {len(no_f0)} recordings')
125
  if not os.path.exists(f0_dir):
126
  os.makedirs(f0_dir)
127
+ for rec in no_f0:
128
+ wav_path = f'{speech_dir}{rec[2]}'
129
+ temp_data_f0 = estimate_pitch(wav_path)
130
+ print('2ND PASS PITCHES FOR', wav_path)
131
+ print(temp_data_f0)
132
+
133
  #TODO
134
+ # Current Todo:
135
+ # have pitch saved to file instead of returned to data
136
+ # tbqh can write my own simplified files instead of make reaper write
137
+ #whatever.
138
 
139
  else:
140
  print('All speech pitch trackings existed')