File size: 2,856 Bytes
82f19d8
b36d167
82f19d8
b36d167
 
 
 
 
82f19d8
b36d167
 
 
82f19d8
b36d167
 
 
 
 
82f19d8
b36d167
 
 
 
 
82f19d8
b36d167
82f19d8
 
 
b36d167
 
82f19d8
b36d167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5626d1e
 
b36d167
 
 
 
 
 
5626d1e
 
b36d167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr  # type: ignore
from helper import process_audio
import numpy as np  # type: ignore

# Sample audio file paths
SAMPLE_SPEECH = "anushka.wav"
SAMPLE_NOISE = "traffic.wav"


def process_audio_files(speech_file, noise_file, alpha, beta):
    """
    Process the audio files and return the mixed output

    Args:
        speech_file (tuple): Speech audio (sample_rate, data)
        noise_file (tuple): Noise audio (sample_rate, data)
        alpha (float): First slider value (-30 to +30)
        beta (float): Second slider value (-30 to +30)

    Returns:
        tuple: (sample_rate, processed_audio_data)
    """
    speech_sr, speech_data = speech_file
    noise_sr, noise_data = noise_file

    # Process the audio using the helper function
    output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr,
                                 alpha, beta)

    # Convert AudioSegment to numpy array
    samples = np.array(output_audio.get_array_of_samples())

    return (output_audio.frame_rate, samples)


# Create the Gradio interface


with gr.Blocks() as app:
    gr.Markdown("# Audio Mixing Application")

    with gr.Row():
        with gr.Column():
            # Input components
            speech_input = gr.Audio(
                label="Speech Audio",
                type="numpy"
            )
            noise_input = gr.Audio(
                label="Noise Audio",
                type="numpy"
            )

            # Sample audio examples
            gr.Examples(
                examples=[[SAMPLE_SPEECH, SAMPLE_NOISE]],
                inputs=[speech_input, noise_input],
                label="Sample Audio Files"
            )

            # Slider controls
            alpha_slider = gr.Slider(
                minimum=-30,
                maximum=30,
                value=0,
                step=1,
                label="Alpha (Speech Control)",
                info="Controls speech loudness: Left (-30) reduces volume, Right (+30) increases volume"  # noqa: E501
            )
            beta_slider = gr.Slider(
                minimum=-30,
                maximum=30,
                value=0,
                step=1,
                label="Beta (Noise Control)",
                info="Controls noise loudness: Left (-30) reduces volume, Right (+30) increases volume"  # noqa: E501
            )

            # Submit button
            submit_btn = gr.Button("Process Audio")

        with gr.Column():
            # Output audio player
            output_audio = gr.Audio(
                label="Mixed Audio",
                type="numpy"
            )

    # Connect the components
    submit_btn.click(
        fn=process_audio_files,
        inputs=[speech_input, noise_input, alpha_slider, beta_slider],
        outputs=output_audio
    )

if __name__ == "__main__":
    app.launch()