Spaces:

arhanv
/

drum-kit-generator

Running on CPU Upgrade

App Files Files Community

arhanv commited on 2 days ago

Commit

00fa560

1 Parent(s): d6e23f8

added timer and changed reverb params

Browse files

Files changed (2) hide show

app.py +20 -11
fx.py +10 -2

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 from inference import generate_drum_kit
 from audio_utils import play_audio
 from fx import get_fx
 # Streamlit UI
 st.title("semantic spaces: kit generator")
@@ -13,23 +14,38 @@ st.write("hint: turn audio effects on! try weird prompts!")
 with st.container(border=True):
     # User Inputs
-    prompt = st.text_input("Describe your drum kit:", "warm vintage organic percussion")
     kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
     use_fx = st.toggle("Apply audio effects?", value=True)
     if use_fx:
         if st.toggle("Use a different prompt for audio effects?", value=True):
-            fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal space")
         else:
             fx_prompt = prompt
     # Run the inference
     if st.button("Generate Drum Kit"):
         drum_kit = generate_drum_kit(prompt, kit_size)
         st.session_state["dry_kit"] = drum_kit
         if use_fx:
-            drum_kit, fx_params = get_fx(drum_kit, fx_prompt)
             st.session_state["fx_params"] = fx_params
         st.session_state["drum_kit"] = drum_kit  # Store results
 # Display results
 if "drum_kit" in st.session_state:
@@ -43,11 +59,4 @@ if "drum_kit" in st.session_state:
         for i, sound_file in enumerate(sounds):
             with cols[i]:
                 if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
-                    play_audio(sound_file)
-    if st.toggle("Show parameters?"):
-        if "fx_params" in st.session_state:
-            st.subheader("FX Parameters")
-            st.write(st.session_state["fx_params"])
-        if "dry_kit" in st.session_state:
-            st.write(st.session_state["dry_kit"])

 from inference import generate_drum_kit
 from audio_utils import play_audio
 from fx import get_fx
+import time
 # Streamlit UI
 st.title("semantic spaces: kit generator")
 with st.container(border=True):
     # User Inputs
+    prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
     kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
     use_fx = st.toggle("Apply audio effects?", value=True)
     if use_fx:
         if st.toggle("Use a different prompt for audio effects?", value=True):
+            fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
         else:
             fx_prompt = prompt
     # Run the inference
     if st.button("Generate Drum Kit"):
+        start_drum_time = time.time()
         drum_kit = generate_drum_kit(prompt, kit_size)
+        drum_time = time.time() - start_drum_time
         st.session_state["dry_kit"] = drum_kit
+        st.write(f"Drum kit generated in {drum_time} seconds.")
         if use_fx:
+            start_fx_time = time.time()
+            drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
+            fx_time = time.time() - start_fx_time
+            st.write(f"Effects generated and applied in {fx_time} seconds.")
+            st.write(f"Pre-effects loss: {pre_fx_fitness}")
+            st.write(f"Post-effects loss: {post_fx_fitness}")
             st.session_state["fx_params"] = fx_params
         st.session_state["drum_kit"] = drum_kit  # Store results
+    if "drum_kit" in st.session_state:
+        with st.expander("Click to view samples and parameters"):
+            if "fx_params" in st.session_state:
+                st.write(st.session_state["fx_params"])
+            if "dry_kit" in st.session_state:
+                st.write(st.session_state["dry_kit"])
 # Display results
 if "drum_kit" in st.session_state:
         for i, sound_file in enumerate(sounds):
             with cols[i]:
                 if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
+                    play_audio(sound_file)

fx.py CHANGED Viewed

@@ -8,6 +8,8 @@ import librosa
 import numpy as np
 import os
 def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
     """Stitch together all drum sounds into one audio file."""
     all_audio = []
@@ -89,7 +91,7 @@ search_space = [
     Real(4000, 20000, name="lowpass"),
     Real(50, 1000, name="highpass"),
     Real(0.0, 0.8, name="reverb_size"),
-    Real(0.0, 1.0, name="reverb_wet"),
     Real(0.0, 10.0, name="drive_db"),
     Real(4.0, 32.0, name="bit_depth")
 ]
@@ -106,10 +108,16 @@ def get_fx(drum_kit, fx_prompt):
     def obj_func(params):
         return objective_function(params, concat_file, text_embedding)
     # Run Bayesian optimization
     res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
     best_params = res.x
     # Apply the best FX parameters to each individual sound
     optimized_kit = {}
     for instrument, samples in drum_kit.items():
@@ -122,4 +130,4 @@ def get_fx(drum_kit, fx_prompt):
             "bit_depth": best_params[5]
         }, write_wav=True) for sample in samples]
-    return optimized_kit, get_params_dict(best_params)

 import numpy as np
 import os
+concat_file_path = "temp_concat.wav"
 def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
     """Stitch together all drum sounds into one audio file."""
     all_audio = []
     Real(4000, 20000, name="lowpass"),
     Real(50, 1000, name="highpass"),
     Real(0.0, 0.8, name="reverb_size"),
+    Real(0.2, 1.0, name="reverb_wet"),
     Real(0.0, 10.0, name="drive_db"),
     Real(4.0, 32.0, name="bit_depth")
 ]
     def obj_func(params):
         return objective_function(params, concat_file, text_embedding)
+    # Get CLAP similarity without FX (for evaluation purposes)
+    pre_fx_fitness = - evaluate_fitness(concat_file_path, text_embedding)
     # Run Bayesian optimization
     res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
     best_params = res.x
+    # Get post-FX fitness (for evaluation purposes)
+    post_fx_fitness = obj_func(best_params)
     # Apply the best FX parameters to each individual sound
     optimized_kit = {}
     for instrument, samples in drum_kit.items():
             "bit_depth": best_params[5]
         }, write_wav=True) for sample in samples]
+    return optimized_kit, get_params_dict(best_params), pre_fx_fitness, post_fx_fitness