import gradio as gr
import requests
import os
from dotenv import load_dotenv
from datetime import datetime
from pathlib import Path
from basic_pitch.inference import predict_and_save
from basic_pitch import ICASSP_2022_MODEL_PATH
from music21 import converter
import base64
# === 1. Environment Configuration ===
load_dotenv()
MUSICGEN_API_URL = os.getenv("MUSICGEN_API_URL")
VEROVIO_API_URL = os.getenv("VEROVIO_API_URL")
Path("output").mkdir(exist_ok=True)
# === 2. Utility Functions ===
def wav_to_musicxml(wav_path: str, timestamp: str = None) -> str:
"""
Convert a WAV audio file to a MusicXML score using pitch detection.
Args:
wav_path (str): Path to the input WAV audio file.
timestamp (str, optional): Custom timestamp for output naming. Defaults to current time.
Returns:
str: File path to the generated MusicXML file.
Raises:
FileNotFoundError: If the MIDI file could not be generated.
"""
timestamp = timestamp or datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = Path("output")
for f in output_dir.glob("*_basic_pitch.mid"):
f.unlink()
predict_and_save(
audio_path_list=[wav_path],
output_directory=str(output_dir),
save_midi=True,
sonify_midi=False,
save_model_outputs=False,
save_notes=False,
model_or_model_path=ICASSP_2022_MODEL_PATH
)
midi_files = list(output_dir.glob("*.mid"))
if not midi_files:
raise FileNotFoundError("❌ Failed to generate MIDI file")
midi_path = midi_files[0]
score = converter.parse(midi_path)
musicxml_path = output_dir / f"generated_{timestamp}.musicxml"
score.write("musicxml", fp=musicxml_path)
return str(musicxml_path)
def render_musicxml_via_verovio_api(musicxml_path: str) -> str:
"""
Render a MusicXML file to an SVG score preview using the Verovio API.
Args:
musicxml_path (str): Path to the MusicXML file.
Returns:
str: HTML string containing base64-encoded SVG score image, or error message on failure.
"""
if not VEROVIO_API_URL:
return "❌ VEROVIO_API_URL is not configured"
try:
with open(musicxml_path, "rb") as f:
files = {'file': f}
response = requests.post(VEROVIO_API_URL, files=files)
except Exception as e:
return f"❌ Verovio API call failed: {e}"
if response.status_code != 200:
return f"❌ Verovio API error {response.status_code}: {response.text}"
try:
svg = response.json()["svg"]
svg_b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = f'''
'''
return html
except Exception as e:
return f"⚠️ Failed to parse SVG: {e}"
def generate_music_from_hum(melody_file, prompt):
"""
Generate music from a humming audio file and a style prompt using an external MusicGen API.
Args:
melody_file (str): Path to the recorded humming audio (.wav).
prompt (str): Text prompt describing desired music style.
Returns:
str: Path to the generated WAV music file, or error message on failure.
"""
if not MUSICGEN_API_URL:
return "❌ MUSICGEN_API_URL is not configured."
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
wav_out_path = f"output/generated_{timestamp}.wav"
try:
with open(melody_file, "rb") as f:
files = {"melody": ("hum.wav", f, "audio/wav")}
data = {"text": prompt}
response = requests.post(MUSICGEN_API_URL, files=files, data=data)
if response.status_code != 200:
return f"❌ API error {response.status_code}: {response.text}"
with open(wav_out_path, "wb") as out:
out.write(response.content)
return wav_out_path
except Exception as e:
return f"❌ Music generation failed: {e}"
def generate_score_from_audio(wav_file):
"""
Generate a MusicXML score from an input audio (.wav) file.
Args:
wav_file (str): Path to the WAV music file.
Returns:
str: File path to the generated MusicXML file, or error message on failure.
"""
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return wav_to_musicxml(wav_file, timestamp)
except Exception as e:
return f"❌ Score generation failed: {e}"
# === 3. UI Construction ===
interface1 = gr.Interface(
fn=wav_to_musicxml,
inputs=[gr.Audio(type="filepath", label="WAV File")],
outputs=gr.Textbox(label="Generated MusicXML File Path")
)
interface2 = gr.Interface(
fn=render_musicxml_via_verovio_api,
inputs=gr.Textbox(label="MusicXML File Path"),
outputs=gr.HTML(label="Rendered SVG Score")
)
interface3 = gr.Interface(
fn=generate_music_from_hum,
inputs=[
gr.Audio(type="filepath", label="Humming Audio (.wav)"),
gr.Textbox(label="Prompt: describe music style")
],
outputs=gr.Audio(type="filepath", label="Generated Music (.wav)")
)
interface4 = gr.Interface(
fn=generate_score_from_audio,
inputs=[gr.Audio(type="filepath", label="Generated Music (.wav)")],
outputs=gr.Textbox(label="Generated MusicXML File Path")
)
with gr.Blocks(title="🎵 Vibe Jamming - Modular Tools") as demo:
gr.Markdown("## 🎼 Vibe Jamming - Modular AI Music Tools")
with gr.Tabs():
with gr.TabItem("1️⃣ WAV ➜ MusicXML"):
interface1.render()
with gr.TabItem("2️⃣ MusicXML ➜ Score Preview"):
interface2.render()
with gr.TabItem("3️⃣ Humming + Prompt ➜ Music"):
interface3.render()
with gr.TabItem("4️⃣ Generated Music ➜ Score"):
interface4.render()
# === 4. Launch MCP Tool App ===
if __name__ == "__main__":
demo.launch(mcp_server=True)