ajayarora1235 commited on
Commit
6736ecf
·
1 Parent(s): afeb57e

get rid of cutoff time

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -1530,6 +1530,11 @@ def run(seed, stop_repetition, sample_batch_size, left_margin, right_margin, cod
1530
  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
1531
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
1532
  os.environ["USER"] = "USER"
 
 
 
 
 
1533
  # take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
1534
  cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
1535
  target_transcript = transcribed_text + target_transcript
@@ -1603,15 +1608,13 @@ def run_joint(input_audio_fn, seed, stop_repetition, sample_batch_size, left_mar
1603
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
1604
  os.environ["USER"] = "USER"
1605
  # take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
1606
- # cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
1607
 
1608
  target_transcript = transcribed_text + ' ' + target_transcript
1609
  print(target_transcript)
1610
  info = torchaudio.info(audio_fn)
1611
  audio_dur = info.num_frames / info.sample_rate
1612
 
1613
- cut_off_sec = audio_dur - 0.1
1614
-
1615
  assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
1616
  prompt_end_frame = int(cut_off_sec * info.sample_rate)
1617
 
 
1530
  os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
1531
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
1532
  os.environ["USER"] = "USER"
1533
+
1534
+ print("Transcribing the input audio")
1535
+ transcribe_btn_click(input_audio_fn)
1536
+ print("Transcription complete")
1537
+
1538
  # take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
1539
  cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
1540
  target_transcript = transcribed_text + target_transcript
 
1608
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
1609
  os.environ["USER"] = "USER"
1610
  # take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
1611
+ cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
1612
 
1613
  target_transcript = transcribed_text + ' ' + target_transcript
1614
  print(target_transcript)
1615
  info = torchaudio.info(audio_fn)
1616
  audio_dur = info.num_frames / info.sample_rate
1617
 
 
 
1618
  assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
1619
  prompt_end_frame = int(cut_off_sec * info.sample_rate)
1620