|
import os |
|
from tsmnet import Stretcher |
|
import gradio as gr |
|
from gradio import processing_utils |
|
import torch |
|
import torchaudio |
|
|
|
model_root = './weights' |
|
available_models = ['general', 'pop-music', 'classical-music', 'speech'] |
|
working_sr = 22050 |
|
|
|
def prepare_models(): |
|
return { |
|
weight: Stretcher(os.path.join(model_root, f'{weight}.pt')) |
|
for weight in available_models |
|
} |
|
|
|
def prepare_audio_file(rec, audio_file, yt_url): |
|
if rec is not None: |
|
return rec |
|
if audio_file is not None: |
|
return audio_file |
|
if yt_url != '': |
|
pass |
|
else: |
|
raise gr.Error('No audio found!') |
|
|
|
|
|
def run(rec, audio_file, yt_url, speed, model, start_time, end_time): |
|
audio_file = prepare_audio_file(rec, audio_file, yt_url) |
|
if speed == 1: |
|
return processing_utils.audio_from_file(audio_file) |
|
|
|
model = models[model] |
|
|
|
x, sr = torchaudio.load(audio_file) |
|
x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x) |
|
sr = working_sr |
|
|
|
x = model(x, speed).cpu() |
|
|
|
torchaudio.save(audio_file, x, sr) |
|
|
|
return processing_utils.audio_from_file(audio_file) |
|
|
|
|
|
|
|
|
|
models = prepare_models() |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown('# TSM-Net') |
|
gr.Markdown('---') |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Tab('From microphone'): |
|
rec_box = gr.Audio(label='Recording', source='microphone', type='filepath') |
|
with gr.Tab('From file'): |
|
audio_file_box = gr.Audio(label='Audio sample', type='filepath') |
|
with gr.Tab('From YouTube'): |
|
yt_url_box = gr.Textbox(label='YouTube URL', placeholder='Under Construction', interactive=False) |
|
|
|
rec_box.change(lambda: [None] * 2, outputs=[audio_file_box, yt_url_box]) |
|
audio_file_box.change(lambda: [None] * 2, outputs=[rec_box, yt_url_box]) |
|
yt_url_box.input(lambda: [None] * 2, outputs=[rec_box, audio_file_box]) |
|
|
|
speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1) |
|
with gr.Accordion('Fine-grained settings', open=False): |
|
with gr.Row(): |
|
gr.Textbox(label='', value='Trim audio sample', interactive=False) |
|
start_time_box = gr.Number(label='Start', value=0) |
|
end_time_box = gr.Number(label='End', value=20) |
|
|
|
model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0]) |
|
|
|
submit_btn = gr.Button('Submit') |
|
|
|
with gr.Column(): |
|
with gr.Accordion('Hint', open=False): |
|
gr.Markdown('You can find more settings under the **Fine-grained settings**') |
|
gr.Markdown('- Feeling slow? Try to adjust the start/end timestamp') |
|
gr.Markdown('- Low audio quality? Try to switch to a proper model weight') |
|
outputs=gr.Audio(label='Output') |
|
|
|
submit_btn.click(fn=run, inputs=[ |
|
rec_box, |
|
audio_file_box, |
|
yt_url_box, |
|
speed_box, |
|
model_box, |
|
start_time_box, |
|
end_time_box, |
|
], outputs=outputs) |
|
|
|
with gr.Accordion('Read more ...', open=False): |
|
gr.Markdown('---') |
|
gr.Markdown( |
|
'We proposed a novel approach in the field of time-scale modification ' |
|
'on audio signals. While traditional methods use the framing technique, ' |
|
'spectral approach uses the short-time Fourier transform to preserve ' |
|
'the frequency during temporal stretching. TSM-Net, our neural-network ' |
|
'model encodes the raw audio into a high-level latent representation. ' |
|
'We call it Neuralgram, in which one vector represents 1024 audio samples. ' |
|
'It is inspired by the framing technique but addresses the clipping ' |
|
'artifacts. The Neuralgram is a two-dimensional matrix with real values, ' |
|
'we can apply some existing image resizing techniques on the Neuralgram ' |
|
'and decode it using our neural decoder to obtain the time-scaled audio. ' |
|
'Both the encoder and decoder are trained with GANs, which shows fair ' |
|
'generalization ability on the scaled Neuralgrams. Our method yields ' |
|
'little artifacts and opens a new possibility in the research of modern ' |
|
'time-scale modification. Please find more detail in our ' |
|
'<a href="https://arxiv.org/abs/2210.17152" target="_blank">paper</a>.' |
|
) |
|
|
|
demo.queue(4) |
|
demo.launch(server_name='0.0.0.0') |
|
|
|
|