Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
added timer and changed reverb params
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import numpy as np
|
|
4 |
from inference import generate_drum_kit
|
5 |
from audio_utils import play_audio
|
6 |
from fx import get_fx
|
|
|
7 |
|
8 |
# Streamlit UI
|
9 |
st.title("semantic spaces: kit generator")
|
@@ -13,23 +14,38 @@ st.write("hint: turn audio effects on! try weird prompts!")
|
|
13 |
|
14 |
with st.container(border=True):
|
15 |
# User Inputs
|
16 |
-
prompt = st.text_input("Describe your drum kit:", "
|
17 |
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
|
18 |
use_fx = st.toggle("Apply audio effects?", value=True)
|
19 |
if use_fx:
|
20 |
if st.toggle("Use a different prompt for audio effects?", value=True):
|
21 |
-
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal
|
22 |
else:
|
23 |
fx_prompt = prompt
|
24 |
|
25 |
# Run the inference
|
26 |
if st.button("Generate Drum Kit"):
|
|
|
27 |
drum_kit = generate_drum_kit(prompt, kit_size)
|
|
|
28 |
st.session_state["dry_kit"] = drum_kit
|
|
|
29 |
if use_fx:
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
st.session_state["fx_params"] = fx_params
|
32 |
st.session_state["drum_kit"] = drum_kit # Store results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# Display results
|
35 |
if "drum_kit" in st.session_state:
|
@@ -43,11 +59,4 @@ if "drum_kit" in st.session_state:
|
|
43 |
for i, sound_file in enumerate(sounds):
|
44 |
with cols[i]:
|
45 |
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
|
46 |
-
play_audio(sound_file)
|
47 |
-
|
48 |
-
if st.toggle("Show parameters?"):
|
49 |
-
if "fx_params" in st.session_state:
|
50 |
-
st.subheader("FX Parameters")
|
51 |
-
st.write(st.session_state["fx_params"])
|
52 |
-
if "dry_kit" in st.session_state:
|
53 |
-
st.write(st.session_state["dry_kit"])
|
|
|
4 |
from inference import generate_drum_kit
|
5 |
from audio_utils import play_audio
|
6 |
from fx import get_fx
|
7 |
+
import time
|
8 |
|
9 |
# Streamlit UI
|
10 |
st.title("semantic spaces: kit generator")
|
|
|
14 |
|
15 |
with st.container(border=True):
|
16 |
# User Inputs
|
17 |
+
prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
|
18 |
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
|
19 |
use_fx = st.toggle("Apply audio effects?", value=True)
|
20 |
if use_fx:
|
21 |
if st.toggle("Use a different prompt for audio effects?", value=True):
|
22 |
+
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
|
23 |
else:
|
24 |
fx_prompt = prompt
|
25 |
|
26 |
# Run the inference
|
27 |
if st.button("Generate Drum Kit"):
|
28 |
+
start_drum_time = time.time()
|
29 |
drum_kit = generate_drum_kit(prompt, kit_size)
|
30 |
+
drum_time = time.time() - start_drum_time
|
31 |
st.session_state["dry_kit"] = drum_kit
|
32 |
+
st.write(f"Drum kit generated in {drum_time} seconds.")
|
33 |
if use_fx:
|
34 |
+
start_fx_time = time.time()
|
35 |
+
drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
|
36 |
+
fx_time = time.time() - start_fx_time
|
37 |
+
st.write(f"Effects generated and applied in {fx_time} seconds.")
|
38 |
+
st.write(f"Pre-effects loss: {pre_fx_fitness}")
|
39 |
+
st.write(f"Post-effects loss: {post_fx_fitness}")
|
40 |
st.session_state["fx_params"] = fx_params
|
41 |
st.session_state["drum_kit"] = drum_kit # Store results
|
42 |
+
|
43 |
+
if "drum_kit" in st.session_state:
|
44 |
+
with st.expander("Click to view samples and parameters"):
|
45 |
+
if "fx_params" in st.session_state:
|
46 |
+
st.write(st.session_state["fx_params"])
|
47 |
+
if "dry_kit" in st.session_state:
|
48 |
+
st.write(st.session_state["dry_kit"])
|
49 |
|
50 |
# Display results
|
51 |
if "drum_kit" in st.session_state:
|
|
|
59 |
for i, sound_file in enumerate(sounds):
|
60 |
with cols[i]:
|
61 |
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
|
62 |
+
play_audio(sound_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fx.py
CHANGED
@@ -8,6 +8,8 @@ import librosa
|
|
8 |
import numpy as np
|
9 |
import os
|
10 |
|
|
|
|
|
11 |
def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
|
12 |
"""Stitch together all drum sounds into one audio file."""
|
13 |
all_audio = []
|
@@ -89,7 +91,7 @@ search_space = [
|
|
89 |
Real(4000, 20000, name="lowpass"),
|
90 |
Real(50, 1000, name="highpass"),
|
91 |
Real(0.0, 0.8, name="reverb_size"),
|
92 |
-
Real(0.
|
93 |
Real(0.0, 10.0, name="drive_db"),
|
94 |
Real(4.0, 32.0, name="bit_depth")
|
95 |
]
|
@@ -106,10 +108,16 @@ def get_fx(drum_kit, fx_prompt):
|
|
106 |
def obj_func(params):
|
107 |
return objective_function(params, concat_file, text_embedding)
|
108 |
|
|
|
|
|
|
|
109 |
# Run Bayesian optimization
|
110 |
res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
|
111 |
best_params = res.x
|
112 |
|
|
|
|
|
|
|
113 |
# Apply the best FX parameters to each individual sound
|
114 |
optimized_kit = {}
|
115 |
for instrument, samples in drum_kit.items():
|
@@ -122,4 +130,4 @@ def get_fx(drum_kit, fx_prompt):
|
|
122 |
"bit_depth": best_params[5]
|
123 |
}, write_wav=True) for sample in samples]
|
124 |
|
125 |
-
return optimized_kit, get_params_dict(best_params)
|
|
|
8 |
import numpy as np
|
9 |
import os
|
10 |
|
11 |
+
concat_file_path = "temp_concat.wav"
|
12 |
+
|
13 |
def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
|
14 |
"""Stitch together all drum sounds into one audio file."""
|
15 |
all_audio = []
|
|
|
91 |
Real(4000, 20000, name="lowpass"),
|
92 |
Real(50, 1000, name="highpass"),
|
93 |
Real(0.0, 0.8, name="reverb_size"),
|
94 |
+
Real(0.2, 1.0, name="reverb_wet"),
|
95 |
Real(0.0, 10.0, name="drive_db"),
|
96 |
Real(4.0, 32.0, name="bit_depth")
|
97 |
]
|
|
|
108 |
def obj_func(params):
|
109 |
return objective_function(params, concat_file, text_embedding)
|
110 |
|
111 |
+
# Get CLAP similarity without FX (for evaluation purposes)
|
112 |
+
pre_fx_fitness = - evaluate_fitness(concat_file_path, text_embedding)
|
113 |
+
|
114 |
# Run Bayesian optimization
|
115 |
res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
|
116 |
best_params = res.x
|
117 |
|
118 |
+
# Get post-FX fitness (for evaluation purposes)
|
119 |
+
post_fx_fitness = obj_func(best_params)
|
120 |
+
|
121 |
# Apply the best FX parameters to each individual sound
|
122 |
optimized_kit = {}
|
123 |
for instrument, samples in drum_kit.items():
|
|
|
130 |
"bit_depth": best_params[5]
|
131 |
}, write_wav=True) for sample in samples]
|
132 |
|
133 |
+
return optimized_kit, get_params_dict(best_params), pre_fx_fitness, post_fx_fitness
|