File size: 6,040 Bytes
96ab4ac
7e16d1f
 
 
7e4b79e
 
 
 
 
54dde8b
 
a5b2713
7e16d1f
 
bec0341
7e16d1f
7e4b79e
 
a5b2713
ee10544
c500a91
 
 
 
 
 
 
 
 
 
ee10544
7e4b79e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10d5c54
7e4b79e
 
 
 
 
d68bfe0
77ae978
5b8b1e0
d68bfe0
c500a91
a5b2713
c500a91
 
 
 
 
bec0341
10d5c54
042e8a5
bec0341
 
 
 
 
10d5c54
042e8a5
bec0341
10d5c54
042e8a5
bec0341
 
 
c120688
 
 
 
 
bec0341
 
10d5c54
f388e55
5b8b1e0
ee10544
c500a91
 
 
 
 
 
 
 
7e16d1f
ee10544
96ab4ac
7e4b79e
 
 
f8a57e8
7e4b79e
 
 
f8a57e8
7e4b79e
ee10544
 
7e4b79e
ee10544
 
7e4b79e
ee10544
7e4b79e
ee10544
77ae978
5b8b1e0
ee10544
c500a91
 
 
 
 
 
 
ee10544
 
 
 
 
7f5ad60
a5b2713
7f5ad60
5bf8d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee10544
 
3b389cf
ee10544
5bf8d21
ee10544
5bf8d21
ee10544
5bf8d21
ee10544
5bf8d21
 
a5b2713
042e8a5
a5e9149
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import gradio as gr
import requests
import os
from dotenv import load_dotenv
from datetime import datetime
from pathlib import Path
from basic_pitch.inference import predict_and_save
from basic_pitch import ICASSP_2022_MODEL_PATH
from music21 import converter
import base64

# === 1. Environment Configuration ===
load_dotenv()
MUSICGEN_API_URL = os.getenv("MUSICGEN_API_URL")
VEROVIO_API_URL = os.getenv("VEROVIO_API_URL")

Path("output").mkdir(exist_ok=True)

# === 2. Utility Functions ===
def wav_to_musicxml(wav_path: str, timestamp: str = None) -> str:
    """
    Convert a WAV audio file to a MusicXML score using pitch detection.
    Args:
        wav_path (str): Path to the input WAV audio file.
        timestamp (str, optional): Custom timestamp for output naming. Defaults to current time.
    Returns:
        str: File path to the generated MusicXML file.
    Raises:
        FileNotFoundError: If the MIDI file could not be generated.
    """
    timestamp = timestamp or datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = Path("output")
    for f in output_dir.glob("*_basic_pitch.mid"):
        f.unlink()

    predict_and_save(
        audio_path_list=[wav_path],
        output_directory=str(output_dir),
        save_midi=True,
        sonify_midi=False,
        save_model_outputs=False,
        save_notes=False,
        model_or_model_path=ICASSP_2022_MODEL_PATH
    )

    midi_files = list(output_dir.glob("*.mid"))
    if not midi_files:
        raise FileNotFoundError("❌ Failed to generate MIDI file")

    midi_path = midi_files[0]
    score = converter.parse(midi_path)
    musicxml_path = output_dir / f"generated_{timestamp}.musicxml"
    score.write("musicxml", fp=musicxml_path)
    return str(musicxml_path)


def render_musicxml_via_verovio_api(musicxml_path: str) -> str:
    """
    Render a MusicXML file to an SVG score preview using the Verovio API.
    Args:
        musicxml_path (str): Path to the MusicXML file.
    Returns:
        str: HTML string containing base64-encoded SVG score image, or error message on failure.
    """
    if not VEROVIO_API_URL:
        return "❌ VEROVIO_API_URL is not configured"

    try:
        with open(musicxml_path, "rb") as f:
            files = {'file': f}
            response = requests.post(VEROVIO_API_URL, files=files)
    except Exception as e:
        return f"❌ Verovio API call failed: {e}"

    if response.status_code != 200:
        return f"❌ Verovio API error {response.status_code}: {response.text}"

    try:
        svg = response.json()["svg"]
        svg_b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
        html = f'''
        <div style="background-color: white; padding: 10px; border-radius: 8px;">
            <img src="data:image/svg+xml;base64,{svg_b64}" style="width:100%; max-height:600px;" />
        </div>
        '''
        return html
    except Exception as e:
        return f"⚠️ Failed to parse SVG: {e}"


def generate_music_from_hum(melody_file, prompt):
    """
    Generate music from a humming audio file and a style prompt using an external MusicGen API.
    Args:
        melody_file (str): Path to the recorded humming audio (.wav).
        prompt (str): Text prompt describing desired music style.
    Returns:
        str: Path to the generated WAV music file, or error message on failure.
    """
    if not MUSICGEN_API_URL:
        return "❌ MUSICGEN_API_URL is not configured."

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    wav_out_path = f"output/generated_{timestamp}.wav"

    try:
        with open(melody_file, "rb") as f:
            files = {"melody": ("hum.wav", f, "audio/wav")}
            data = {"text": prompt}
            response = requests.post(MUSICGEN_API_URL, files=files, data=data)

        if response.status_code != 200:
            return f"❌ API error {response.status_code}: {response.text}"

        with open(wav_out_path, "wb") as out:
            out.write(response.content)

        return wav_out_path
    except Exception as e:
        return f"❌ Music generation failed: {e}"


def generate_score_from_audio(wav_file):
    """
    Generate a MusicXML score from an input audio (.wav) file.
    Args:
        wav_file (str): Path to the WAV music file.
    Returns:
        str: File path to the generated MusicXML file, or error message on failure.
    """
    try:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        return wav_to_musicxml(wav_file, timestamp)
    except Exception as e:
        return f"❌ Score generation failed: {e}"

# === 3. UI Construction ===

interface1 = gr.Interface(
    fn=wav_to_musicxml,
    inputs=[gr.Audio(type="filepath", label="WAV File")],
    outputs=gr.Textbox(label="Generated MusicXML File Path")
)
interface2 = gr.Interface(
    fn=render_musicxml_via_verovio_api,
    inputs=gr.Textbox(label="MusicXML File Path"),
    outputs=gr.HTML(label="Rendered SVG Score")
)
interface3 = gr.Interface(
    fn=generate_music_from_hum,
    inputs=[
        gr.Audio(type="filepath", label="Humming Audio (.wav)"),
        gr.Textbox(label="Prompt: describe music style")
    ],
    outputs=gr.Audio(type="filepath", label="Generated Music (.wav)")
)
interface4 = gr.Interface(
    fn=generate_score_from_audio,
    inputs=[gr.Audio(type="filepath", label="Generated Music (.wav)")],
    outputs=gr.Textbox(label="Generated MusicXML File Path")
)

with gr.Blocks(title="🎵 Vibe Jamming - Modular Tools") as demo:
    gr.Markdown("## 🎼 Vibe Jamming - Modular AI Music Tools")
    with gr.Tabs():
        with gr.TabItem("1️⃣ WAV ➜ MusicXML"):
            interface1.render()
        with gr.TabItem("2️⃣ MusicXML ➜ Score Preview"):
            interface2.render()
        with gr.TabItem("3️⃣ Humming + Prompt ➜ Music"):
            interface3.render()
        with gr.TabItem("4️⃣ Generated Music ➜ Score"):
            interface4.render()

# === 4. Launch MCP Tool App ===
if __name__ == "__main__":
    demo.launch(mcp_server=True)