arhanv commited on
Commit
00fa560
·
1 Parent(s): d6e23f8

added timer and changed reverb params

Browse files
Files changed (2) hide show
  1. app.py +20 -11
  2. fx.py +10 -2
app.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  from inference import generate_drum_kit
5
  from audio_utils import play_audio
6
  from fx import get_fx
 
7
 
8
  # Streamlit UI
9
  st.title("semantic spaces: kit generator")
@@ -13,23 +14,38 @@ st.write("hint: turn audio effects on! try weird prompts!")
13
 
14
  with st.container(border=True):
15
  # User Inputs
16
- prompt = st.text_input("Describe your drum kit:", "warm vintage organic percussion")
17
  kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
18
  use_fx = st.toggle("Apply audio effects?", value=True)
19
  if use_fx:
20
  if st.toggle("Use a different prompt for audio effects?", value=True):
21
- fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal space")
22
  else:
23
  fx_prompt = prompt
24
 
25
  # Run the inference
26
  if st.button("Generate Drum Kit"):
 
27
  drum_kit = generate_drum_kit(prompt, kit_size)
 
28
  st.session_state["dry_kit"] = drum_kit
 
29
  if use_fx:
30
- drum_kit, fx_params = get_fx(drum_kit, fx_prompt)
 
 
 
 
 
31
  st.session_state["fx_params"] = fx_params
32
  st.session_state["drum_kit"] = drum_kit # Store results
 
 
 
 
 
 
 
33
 
34
  # Display results
35
  if "drum_kit" in st.session_state:
@@ -43,11 +59,4 @@ if "drum_kit" in st.session_state:
43
  for i, sound_file in enumerate(sounds):
44
  with cols[i]:
45
  if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
46
- play_audio(sound_file)
47
-
48
- if st.toggle("Show parameters?"):
49
- if "fx_params" in st.session_state:
50
- st.subheader("FX Parameters")
51
- st.write(st.session_state["fx_params"])
52
- if "dry_kit" in st.session_state:
53
- st.write(st.session_state["dry_kit"])
 
4
  from inference import generate_drum_kit
5
  from audio_utils import play_audio
6
  from fx import get_fx
7
+ import time
8
 
9
  # Streamlit UI
10
  st.title("semantic spaces: kit generator")
 
14
 
15
  with st.container(border=True):
16
  # User Inputs
17
+ prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
18
  kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
19
  use_fx = st.toggle("Apply audio effects?", value=True)
20
  if use_fx:
21
  if st.toggle("Use a different prompt for audio effects?", value=True):
22
+ fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
23
  else:
24
  fx_prompt = prompt
25
 
26
  # Run the inference
27
  if st.button("Generate Drum Kit"):
28
+ start_drum_time = time.time()
29
  drum_kit = generate_drum_kit(prompt, kit_size)
30
+ drum_time = time.time() - start_drum_time
31
  st.session_state["dry_kit"] = drum_kit
32
+ st.write(f"Drum kit generated in {drum_time} seconds.")
33
  if use_fx:
34
+ start_fx_time = time.time()
35
+ drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
36
+ fx_time = time.time() - start_fx_time
37
+ st.write(f"Effects generated and applied in {fx_time} seconds.")
38
+ st.write(f"Pre-effects loss: {pre_fx_fitness}")
39
+ st.write(f"Post-effects loss: {post_fx_fitness}")
40
  st.session_state["fx_params"] = fx_params
41
  st.session_state["drum_kit"] = drum_kit # Store results
42
+
43
+ if "drum_kit" in st.session_state:
44
+ with st.expander("Click to view samples and parameters"):
45
+ if "fx_params" in st.session_state:
46
+ st.write(st.session_state["fx_params"])
47
+ if "dry_kit" in st.session_state:
48
+ st.write(st.session_state["dry_kit"])
49
 
50
  # Display results
51
  if "drum_kit" in st.session_state:
 
59
  for i, sound_file in enumerate(sounds):
60
  with cols[i]:
61
  if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
62
+ play_audio(sound_file)
 
 
 
 
 
 
 
fx.py CHANGED
@@ -8,6 +8,8 @@ import librosa
8
  import numpy as np
9
  import os
10
 
 
 
11
  def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
12
  """Stitch together all drum sounds into one audio file."""
13
  all_audio = []
@@ -89,7 +91,7 @@ search_space = [
89
  Real(4000, 20000, name="lowpass"),
90
  Real(50, 1000, name="highpass"),
91
  Real(0.0, 0.8, name="reverb_size"),
92
- Real(0.0, 1.0, name="reverb_wet"),
93
  Real(0.0, 10.0, name="drive_db"),
94
  Real(4.0, 32.0, name="bit_depth")
95
  ]
@@ -106,10 +108,16 @@ def get_fx(drum_kit, fx_prompt):
106
  def obj_func(params):
107
  return objective_function(params, concat_file, text_embedding)
108
 
 
 
 
109
  # Run Bayesian optimization
110
  res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
111
  best_params = res.x
112
 
 
 
 
113
  # Apply the best FX parameters to each individual sound
114
  optimized_kit = {}
115
  for instrument, samples in drum_kit.items():
@@ -122,4 +130,4 @@ def get_fx(drum_kit, fx_prompt):
122
  "bit_depth": best_params[5]
123
  }, write_wav=True) for sample in samples]
124
 
125
- return optimized_kit, get_params_dict(best_params)
 
8
  import numpy as np
9
  import os
10
 
11
+ concat_file_path = "temp_concat.wav"
12
+
13
  def concatenate_sounds(drum_kit, output_path="temp_concat.wav"):
14
  """Stitch together all drum sounds into one audio file."""
15
  all_audio = []
 
91
  Real(4000, 20000, name="lowpass"),
92
  Real(50, 1000, name="highpass"),
93
  Real(0.0, 0.8, name="reverb_size"),
94
+ Real(0.2, 1.0, name="reverb_wet"),
95
  Real(0.0, 10.0, name="drive_db"),
96
  Real(4.0, 32.0, name="bit_depth")
97
  ]
 
108
  def obj_func(params):
109
  return objective_function(params, concat_file, text_embedding)
110
 
111
+ # Get CLAP similarity without FX (for evaluation purposes)
112
+ pre_fx_fitness = - evaluate_fitness(concat_file_path, text_embedding)
113
+
114
  # Run Bayesian optimization
115
  res = gp_minimize(obj_func, search_space, n_calls=30, random_state=42)
116
  best_params = res.x
117
 
118
+ # Get post-FX fitness (for evaluation purposes)
119
+ post_fx_fitness = obj_func(best_params)
120
+
121
  # Apply the best FX parameters to each individual sound
122
  optimized_kit = {}
123
  for instrument, samples in drum_kit.items():
 
130
  "bit_depth": best_params[5]
131
  }, write_wav=True) for sample in samples]
132
 
133
+ return optimized_kit, get_params_dict(best_params), pre_fx_fitness, post_fx_fitness