ajayarora1235
commited on
Commit
·
6736ecf
1
Parent(s):
afeb57e
get rid of cutoff time
Browse files
app.py
CHANGED
@@ -1530,6 +1530,11 @@ def run(seed, stop_repetition, sample_batch_size, left_margin, right_margin, cod
|
|
1530 |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
1531 |
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
1532 |
os.environ["USER"] = "USER"
|
|
|
|
|
|
|
|
|
|
|
1533 |
# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
|
1534 |
cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
|
1535 |
target_transcript = transcribed_text + target_transcript
|
@@ -1603,15 +1608,13 @@ def run_joint(input_audio_fn, seed, stop_repetition, sample_batch_size, left_mar
|
|
1603 |
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
1604 |
os.environ["USER"] = "USER"
|
1605 |
# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
|
1606 |
-
|
1607 |
|
1608 |
target_transcript = transcribed_text + ' ' + target_transcript
|
1609 |
print(target_transcript)
|
1610 |
info = torchaudio.info(audio_fn)
|
1611 |
audio_dur = info.num_frames / info.sample_rate
|
1612 |
|
1613 |
-
cut_off_sec = audio_dur - 0.1
|
1614 |
-
|
1615 |
assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
|
1616 |
prompt_end_frame = int(cut_off_sec * info.sample_rate)
|
1617 |
|
|
|
1530 |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
1531 |
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
1532 |
os.environ["USER"] = "USER"
|
1533 |
+
|
1534 |
+
print("Transcribing the input audio")
|
1535 |
+
transcribe_btn_click(input_audio_fn)
|
1536 |
+
print("Transcription complete")
|
1537 |
+
|
1538 |
# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
|
1539 |
cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
|
1540 |
target_transcript = transcribed_text + target_transcript
|
|
|
1608 |
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
1609 |
os.environ["USER"] = "USER"
|
1610 |
# take a look at demo/temp/mfa_alignment, decide which part of the audio to use as prompt
|
1611 |
+
cut_off_sec = cutoff_value # NOTE: according to forced-alignment file, the word "common" stop as 3.01 sec, this should be different for different audio
|
1612 |
|
1613 |
target_transcript = transcribed_text + ' ' + target_transcript
|
1614 |
print(target_transcript)
|
1615 |
info = torchaudio.info(audio_fn)
|
1616 |
audio_dur = info.num_frames / info.sample_rate
|
1617 |
|
|
|
|
|
1618 |
assert cut_off_sec < audio_dur, f"cut_off_sec {cut_off_sec} is larger than the audio duration {audio_dur}"
|
1619 |
prompt_end_frame = int(cut_off_sec * info.sample_rate)
|
1620 |
|