File size: 2,854 Bytes
1f5d38f
 
 
 
 
338e293
00fa560
1f5d38f
 
4c49639
0ddae63
 
 
692627b
1f5d38f
0ddae63
 
00fa560
0ddae63
 
338e293
0ddae63
00fa560
0ddae63
 
 
 
 
692627b
00fa560
0ddae63
00fa560
0ddae63
692627b
0ddae63
692627b
00fa560
 
 
 
 
 
0ddae63
 
00fa560
 
 
 
 
 
 
1f5d38f
 
 
 
 
 
 
 
 
 
 
 
 
00fa560
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import os
import numpy as np
from inference import generate_drum_kit
from audio_utils import play_audio
from fx import get_fx
import time

# Streamlit UI
st.title("semantic spaces: kit generator")
st.subheader("generate drum kits and audio effects with text prompts")
st.write("uses publicly available samples from [freesound](https://zenodo.org/records/4687854) and [CLAP embeddings](https://github.com/LAION-AI/CLAP) for text-based querying")
st.write("hint: turn audio effects on! try weird prompts!")
st.write("(works best on desktop, audio effects should take 20s-1min to generate)")

with st.container(border=True):
    # User Inputs
    prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
    kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
    use_fx = st.toggle("Apply audio effects?", value=True)
    if use_fx:
        if st.toggle("Use a different prompt for audio effects?", value=True):
            fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
        else:
            fx_prompt = prompt

    # Run the inference
    if st.button("Generate Drum Kit"):
        st.write("Finding the best samples...")
        start_drum_time = time.time()
        drum_kit = generate_drum_kit(prompt, kit_size)
        drum_time = time.time() - start_drum_time
        st.session_state["dry_kit"] = drum_kit
        st.write(f"Drum Kit generated in {drum_time} seconds.")
        if use_fx:
            st.write("Generating audio effects... (takes up to 1 min)")
            start_fx_time = time.time()
            drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
            fx_time = time.time() - start_fx_time
            st.write(f"Effects generated and applied in {fx_time} seconds.")
            st.write(f"Pre-effects loss: {pre_fx_fitness}")
            st.write(f"Post-effects loss: {post_fx_fitness}")
            st.session_state["fx_params"] = fx_params
        st.session_state["drum_kit"] = drum_kit  # Store results
    
    if "drum_kit" in st.session_state:
        with st.expander("Click to view samples and parameters"):
            if "fx_params" in st.session_state:
                st.write(st.session_state["fx_params"])
            if "dry_kit" in st.session_state:
                st.write(st.session_state["dry_kit"])

# Display results
if "drum_kit" in st.session_state:
    drum_kit = st.session_state["drum_kit"]
    st.subheader("Generated Drum Kit")

    for instrument, sounds in drum_kit.items():
        st.write(f"**{instrument}**")
        cols = st.columns(len(sounds))

        for i, sound_file in enumerate(sounds):
            with cols[i]:
                if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
                    play_audio(sound_file)