arhanv's picture
updated readme and loading screens
692627b
import streamlit as st
import os
import numpy as np
from inference import generate_drum_kit
from audio_utils import play_audio
from fx import get_fx
import time
# Streamlit UI
st.title("semantic spaces: kit generator")
st.subheader("generate drum kits and audio effects with text prompts")
st.write("uses publicly available samples from [freesound](https://zenodo.org/records/4687854) and [CLAP embeddings](https://github.com/LAION-AI/CLAP) for text-based querying")
st.write("hint: turn audio effects on! try weird prompts!")
st.write("(works best on desktop, audio effects should take 20s-1min to generate)")
with st.container(border=True):
# User Inputs
prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
use_fx = st.toggle("Apply audio effects?", value=True)
if use_fx:
if st.toggle("Use a different prompt for audio effects?", value=True):
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
else:
fx_prompt = prompt
# Run the inference
if st.button("Generate Drum Kit"):
st.write("Finding the best samples...")
start_drum_time = time.time()
drum_kit = generate_drum_kit(prompt, kit_size)
drum_time = time.time() - start_drum_time
st.session_state["dry_kit"] = drum_kit
st.write(f"Drum Kit generated in {drum_time} seconds.")
if use_fx:
st.write("Generating audio effects... (takes up to 1 min)")
start_fx_time = time.time()
drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
fx_time = time.time() - start_fx_time
st.write(f"Effects generated and applied in {fx_time} seconds.")
st.write(f"Pre-effects loss: {pre_fx_fitness}")
st.write(f"Post-effects loss: {post_fx_fitness}")
st.session_state["fx_params"] = fx_params
st.session_state["drum_kit"] = drum_kit # Store results
if "drum_kit" in st.session_state:
with st.expander("Click to view samples and parameters"):
if "fx_params" in st.session_state:
st.write(st.session_state["fx_params"])
if "dry_kit" in st.session_state:
st.write(st.session_state["dry_kit"])
# Display results
if "drum_kit" in st.session_state:
drum_kit = st.session_state["drum_kit"]
st.subheader("Generated Drum Kit")
for instrument, sounds in drum_kit.items():
st.write(f"**{instrument}**")
cols = st.columns(len(sounds))
for i, sound_file in enumerate(sounds):
with cols[i]:
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
play_audio(sound_file)