mattricesound commited on
Commit
5682d80
·
1 Parent(s): 5e4e307

Improve speed of selecting random chunks2

Browse files
Files changed (2) hide show
  1. remfx/datasets.py +0 -3
  2. remfx/utils.py +7 -6
remfx/datasets.py CHANGED
@@ -205,14 +205,11 @@ class EffectDataset(Dataset):
205
  if render_files:
206
  # Split audio file into chunks, resample, then apply random effects
207
  self.proc_root.mkdir(parents=True, exist_ok=True)
208
- bad_files = set()
209
  for num_chunk in tqdm(range(self.total_chunks)):
210
  chunk = None
211
  while chunk is None:
212
  random_dataset_choice = random.choice(self.files)
213
  random_file_choice = random.choice(random_dataset_choice)
214
- if random_file_choice in bad_files:
215
- continue
216
  chunk = select_random_chunk(
217
  random_file_choice, self.chunk_size, self.sample_rate
218
  )
 
205
  if render_files:
206
  # Split audio file into chunks, resample, then apply random effects
207
  self.proc_root.mkdir(parents=True, exist_ok=True)
 
208
  for num_chunk in tqdm(range(self.total_chunks)):
209
  chunk = None
210
  while chunk is None:
211
  random_dataset_choice = random.choice(self.files)
212
  random_file_choice = random.choice(random_dataset_choice)
 
 
213
  chunk = select_random_chunk(
214
  random_file_choice, self.chunk_size, self.sample_rate
215
  )
remfx/utils.py CHANGED
@@ -130,7 +130,7 @@ def create_sequential_chunks(
130
  audio_file: str, chunk_size: int, sample_rate: int
131
  ) -> List[torch.Tensor]:
132
  """Create sequential chunks of size chunk_size from an audio file.
133
- Return sample_index of start of each chunk and original sr
134
  """
135
  chunks = []
136
  audio, sr = torchaudio.load(audio_file)
@@ -150,13 +150,14 @@ def create_sequential_chunks(
150
  def select_random_chunk(
151
  audio_file: str, chunk_size: int, sample_rate: int
152
  ) -> List[torch.Tensor]:
153
- """Create sequential chunks of size chunk_size (samples) from an audio file.
154
- Return sample_index of start of each chunk and original sr
155
- """
156
  audio, sr = torchaudio.load(audio_file)
157
- max_len = audio.shape[-1] - int(chunk_size * (sample_rate / sr))
 
 
 
158
  random_start = torch.randint(0, max_len, (1,)).item()
159
- chunk = audio[:, random_start : random_start + chunk_size]
160
  resampled_chunk = torchaudio.functional.resample(chunk, sr, sample_rate)
161
  return resampled_chunk
162
 
 
130
  audio_file: str, chunk_size: int, sample_rate: int
131
  ) -> List[torch.Tensor]:
132
  """Create sequential chunks of size chunk_size from an audio file.
133
+ Return each chunk
134
  """
135
  chunks = []
136
  audio, sr = torchaudio.load(audio_file)
 
150
  def select_random_chunk(
151
  audio_file: str, chunk_size: int, sample_rate: int
152
  ) -> List[torch.Tensor]:
153
+ """Select random chunk of size chunk_size (samples) from an audio file."""
 
 
154
  audio, sr = torchaudio.load(audio_file)
155
+ new_chunk_size = int(chunk_size * (sr / sample_rate))
156
+ if new_chunk_size >= audio.shape[-1]:
157
+ return None
158
+ max_len = audio.shape[-1] - new_chunk_size
159
  random_start = torch.randint(0, max_len, (1,)).item()
160
+ chunk = audio[:, random_start : random_start + new_chunk_size]
161
  resampled_chunk = torchaudio.functional.resample(chunk, sr, sample_rate)
162
  return resampled_chunk
163