Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,854 Bytes
1f5d38f 338e293 00fa560 1f5d38f 4c49639 0ddae63 692627b 1f5d38f 0ddae63 00fa560 0ddae63 338e293 0ddae63 00fa560 0ddae63 692627b 00fa560 0ddae63 00fa560 0ddae63 692627b 0ddae63 692627b 00fa560 0ddae63 00fa560 1f5d38f 00fa560 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import streamlit as st
import os
import numpy as np
from inference import generate_drum_kit
from audio_utils import play_audio
from fx import get_fx
import time
# Streamlit UI
st.title("semantic spaces: kit generator")
st.subheader("generate drum kits and audio effects with text prompts")
st.write("uses publicly available samples from [freesound](https://zenodo.org/records/4687854) and [CLAP embeddings](https://github.com/LAION-AI/CLAP) for text-based querying")
st.write("hint: turn audio effects on! try weird prompts!")
st.write("(works best on desktop, audio effects should take 20s-1min to generate)")
with st.container(border=True):
# User Inputs
prompt = st.text_input("Describe your drum kit:", "8-bit video game drums")
kit_size = st.slider("Number of sounds per instrument:", 1, 10, 4)
use_fx = st.toggle("Apply audio effects?", value=True)
if use_fx:
if st.toggle("Use a different prompt for audio effects?", value=True):
fx_prompt = st.text_input("Describe your desired FX tone:", "soft and ethereal reverb")
else:
fx_prompt = prompt
# Run the inference
if st.button("Generate Drum Kit"):
st.write("Finding the best samples...")
start_drum_time = time.time()
drum_kit = generate_drum_kit(prompt, kit_size)
drum_time = time.time() - start_drum_time
st.session_state["dry_kit"] = drum_kit
st.write(f"Drum Kit generated in {drum_time} seconds.")
if use_fx:
st.write("Generating audio effects... (takes up to 1 min)")
start_fx_time = time.time()
drum_kit, fx_params, pre_fx_fitness, post_fx_fitness = get_fx(drum_kit, fx_prompt)
fx_time = time.time() - start_fx_time
st.write(f"Effects generated and applied in {fx_time} seconds.")
st.write(f"Pre-effects loss: {pre_fx_fitness}")
st.write(f"Post-effects loss: {post_fx_fitness}")
st.session_state["fx_params"] = fx_params
st.session_state["drum_kit"] = drum_kit # Store results
if "drum_kit" in st.session_state:
with st.expander("Click to view samples and parameters"):
if "fx_params" in st.session_state:
st.write(st.session_state["fx_params"])
if "dry_kit" in st.session_state:
st.write(st.session_state["dry_kit"])
# Display results
if "drum_kit" in st.session_state:
drum_kit = st.session_state["drum_kit"]
st.subheader("Generated Drum Kit")
for instrument, sounds in drum_kit.items():
st.write(f"**{instrument}**")
cols = st.columns(len(sounds))
for i, sound_file in enumerate(sounds):
with cols[i]:
if st.button(f"▶️ {os.path.basename(sound_file)}", key=sound_file):
play_audio(sound_file) |