Audio-Text-to-Text
Safetensors
English
Chinese
tinyllava
custom_code

music score(in ABC notation) generator for instrumental piece , fintuned from the MuFun model proposed in Advancing the Foundation Model for Music Understanding

train code: https://github.com/laitselec/MuFun

Usage

some audio processing packages like mutagen, torchaudio are needed to be installed

(ABC code can be pasted into software like EasyABC to listen or transfom to other formats)

from transformers import AutoTokenizer, AutoModelForCausalLM
hf_path = 'Yi3852/MuFun-ABC'
tokenizer = AutoTokenizer.from_pretrained(hf_path, use_fast=False)
device='cuda'
model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True, torch_dtype="bfloat16")
model.to(device)

aud="/path/to/your/song.wav"
inp='\n<audio>Could you give me the ABC code for this music piece?'
res=model.chat(prompt=inp, audio_files=aud, segs=None, tokenizer=tokenizer)
print(res)
# X:1
# M:3/4
# L:1/16
# K:Bm
# [D2D,2]F,2 A,2D2 [A,,2A,,2]E,2 | A,2C2 [B,,2B,,2]D,2 F,2B,2 | [F,,2F,,2]C,2 F,2A,2 [G,,2G,,2]B,,2 | D,2G,2 [D,,2D,,2]F,2 A,2D2 | [G,,2G,,2]E,2 B,2D2 [A,,2A,,2]E,2 |
# A,2C2 [f2D,2D,2]F,2 A,2D2 | [e2A,,2A,,2]E,2 A,2C2 [d2B,,2B,,2]D,2 | F,2B,2 [c2F,,2F,,2]C,2 F,2A,2 | [B2G,,2G,,2]B,,2 D,2G,2 [A2D,,2D,,2]F,2 |
# A,2D2 [B2G,,2G,,2]E,2 B,2D2 | [c2A,,2A,,2]E,2 A,2C2 [d2D,2D,2]F,2 | A,2D2 [c2E2A,,2A,,2]E,2 A,2C2 | [B2D2B,,2B,,2]D,2 F,2B,2 [A2C2F,,2F,,2]C,2 |
# F,2A,2 [G2B,2G,,2G,,2]B,,2 D,2G,2 | [F2A,2D,,2D,,2]F,2 A,2D2 [G2B,2G,,2G,,2]E,2 | B,2D2 [E2C2A,,2A,,2]E,2 A,2C2 | [D2D2D,2D,2]F,2 [D2F2A,2]F,2 [A2E2C2A,,2A,,2]C,2 |
# [G2E,2]A,2 [F2B,2B,,2B,,2]D,2 [D2B,2F,2]D,2 | [F2A,2F,,2F,,2]A,,2 [E2C,2]F,2 [D2D2G,,2G,,2]B,,2 | [B,2D2B,2D,2]G,2 [D2A,2D,,2D,,2]A,,2 [A2F2D,2]F,2 | [G2D2G,,2G,,2]E,2 [B2D2G,2]E,2 [A2E2C2A,,2A,,2]C,2 |
# [G2E,2]A,2 [d2F2D2D,2D,2]c2 [d2F,2]D2 | [C2A,2A,,2A,,2]A2 [E2G,2]F2 [D2F,2B,,2B,,2]d2 | [c2D,2]B2 [A2F2F,,2F,,2]F2 [A2E,2]B2 | [G2D2G,,2G,,2]F2 [E2B,2]G2 [F2A,2D,,2D,,2]E2 |
# [D2F,2]C2 [B,2B,2G,,2G,,2]A2 [G2D,2]F2 | [E2C2A,,2A,,2][G2E,2] [F2A,2]E2 [D2D2D,2D,2]E,2 | [d2F,2]G,2 [c2A,2A,,2A,,2]E,2 [E2A,2]G,2 | [D2F,2B,,2B,,2]B,2 [F2A,2]G,2 [A2F2F,,2F,,2]C,2 |
# [A,2F,2]E,2 [B,2D2G,,2G,,2]B,,2 [G2B,,2]C,2 | [F2D,,2D,,2]C,2 [d2B,,2][c2A,,2] [B2D2G,,2G,,2][A2F,2] | [G2E,2][F2D,2] [c2E2A,,2A,,2]G,2 F,2E,2 | [d2F2D2D,2D,2][fA,]g [a2D2]fg [aA,,A,,]A[BE,]c |
# [dA,]efg [f2B,,2B,,2][dF,]e [f2B,2]FG | [AF,,F,,]B[AC,]G [AF,]FGA [G2G,,2G,,2][BD,]A | [G2B,2]FE [FD,,D,,]E[DA,,]E [FD,]GAB | [G2G,,2G,,2][BD,]A [B2G,2]cd [AA,,A,,]B[cE,]d |
# [eC]fga [f2A,2D,2D,2][dF,][eG,] [f2A,2][eG,][dF,] | [eA,A,,A,,]c[dB,,][eC,] [fD,][eE,][dF,]c [d2F,2B,,2B,,2][BD,][cE,] | [d2F,2][DF,][EG,] [FA,D,D,][GB,][FA,][EG,] [FA,]dcd | [B2G,,2][dB,][cA,] [B2G,2][AF,][GE,] [AF,D,D,][GE,][FD,][GE,] |
# [AF,][BG,][cA,][dB,] [B2G,,2][dB,][cA,] [d2B,2][cA,][BG,] | [cA,A,,A,,][dB,,][eC,][dD,] [cE,][dF,][BG,][cA,] [d2F,2F,2][DA,]E | [F2C2][D2A,2] [C2A,,2A,,2][cG,]d [e2E2][c2G,2] | [B2B,,2B,,2][B,D,]C [D2F,2][B,2D,2] [C2F,,2F,,2][AC,]G |
# [F2A,2][E2C,2] [D2G,,2G,,2][GD,]F [E2B,2][G2D,2] | [F2D,,2D,,2][DA,,]E [F2D,2][A2F,2] [G2G,,2G,,2][BD,]A | [G2B,2][F2D,2] [E2A,,2A,,2][AE,]G [F2C2][E2E,2] | [D2D,,2D,,2][dFA,D,][cEE,] [d2F2A,2F,2][F2A,2D,2] [A2E2C2A,,2A,,2][ACA,A,][BD] |
# [c2E2A,2][A2C2A,2A,2] [d2F2B,2B,,2B,,2][dFB,F,][eC] [f2D2F,2F,2][d2F2B,2F,2] | [f2A2F2F,,2F,,2][fFA,A,][eGG,] [d2A2F,2F,2][c2A2A,2E,2] [B2D2G,,2G,,2][BDBG,][ADF,] | [B2G2E,2E,2][c2E2G,2G,2] [d2A2F,2D,,2D,,2][fAFD,D,][eGE,] [d2A2F,2F,2][f2A2A,2A,2] | [g2B2D,2G,,2G,,2][dB,B,D,][cA,A,] [B2G,2G,2D,2][B2F,2F,2] [c2A2E,2E,2A,,2][cEA,CA,][G,G,] |
# [A2F2F,2C2][G2C2E,2E,2] [F2D2D2D,2D,2]F,2 A,2[f2d2D2] | [f2c2E2A,,2A,,2][g2E,2] [f2A,2][e2C2] [d2B2B,,2B,,2]D,2 | F,2[d2B,2] [d2A2F2F,,2F,,2][e2D,2] [d2F,2][c2A,2] | [B2D2G,,2G,,2]D,2 G,2E2 [d2F2F,,2F,,2]A,,2 |
# D,2F,2 [d2G2D2G,,2G,,2][=c2D,2] [B2D2G,2]^c2 | [A2D2G2A,,2A,,2]E,2 [C2G,2][A2A,2] [F2D2D2D,2D,2][d2F,2] | A,2[f2a2D2] [f2a2c2E2A,,2A,,2][g2b2E,2] [f2a2A,2][e2g2] | [d2f2B,,2B,,2][B2F,2] A,2[d2f2D2] [d2f2A2F2F,,2F,,2][e2g2E2A,,2] |
# [d2f2D,2][c2e2] [B2d2D2G,,2G,,2][=c2D,2] [B2D2G,2][E2^c2] | [A2F2F2D,,2D,,2]A,,2 D,2[A2F,2] [B2D2D2G,,2G,,2]=C2 | [G4d4B,4B,4] [A2c2E2A,,2A,,2][E2E,2] [A2C2][G2c2] | [dFD,,D,,][A,,A,,][D,D,][F,F,] [A,A,][D,D,][dFF][A,A,] [dEAA,,A,,][A,A,][dGGb][A,A,] |
# [cFFa][A,A,][cEEg][A,A,] [dfB,,B,,][D,D,][F,F,][B,B,] [DD][F,F,][fDD][F,F,] | [fBFF,,F,,][F,F,][gEED][F,F,] [fDDAF][F,F,][eCC][F,F,] [BdG,,G,,][D,D,][=cCC][D,D,] | [B,B,B,][D,D,][cCCG][D,D,] [BD,,D,,][A,,A,,][GD,D,][F,F,] [FA,A,][F,F,][ED,D,][A,,A,,] | [BFGG,,G,,][B,,B,,][FD,D,][G,G,] [dDB,B,][G,G,][D,D,][B,,B,,] [dGA,,A,,][D,D,][EE,E,][G,G,] |
# [dA,A,][G,G,][CE,E,][A,,A,,] [cFD,,D,,][D,D,][F,F,] [dfA,A,][F,F,][D,D,] | [fcAEA,,A,,][C,C,][E,E,] [ecA,A,][E,E,][C,C,] [dFGBB,,B,,][D,D,][F,F,] | [fbd'B,B,][F,F,][D,D,] [fbd'F,,F,,][D,D,][F,F,] [=c'2A,2A,2][F,F,][D,D,] | [egbG,,G,,][B,,B,,][D,D,] [d'2b2G,2G,2][D,D,][B,,B,,] [fad'D,,D,,][A,,A,,][D,D,] |
# [aF,F,][D,D,][A,,A,,] [d2b2G,,2G,,2][B,,B,,][E,E,] [B2g2G,2G,2][E,E,][B,,B,,] | [c2a2A,,2A,,2][C,C,][E,E,] [e2c2A,2A,2][E,E,][C,C,] [dfD,,D,,][A,,A,,][D,D,][F,F,] | [D2F2A,2A,2][D,2D,2] [E2C2A,,,2A,,,2][A,,A,,][C,C,][E,E,] [c2e2A,2A,2A,2][G2E,2E,2] | [d2B2F2B,,,2B,,,2][B,,B,,][D,D,][F,F,] [D2F2A,2A,2][F,2F,2] [CA,F,,F,,][A,,A,,][C,C,][F,F,] |
# [c2A2A,2A,2A,2][E2C,2C,2] [B2G2D2G,,2G,,2][B,,B,,][D,D,][G,G,] [g2d2b2B,2B,2][D,2D,2] | [afD,,D,,][A,,A,,][D,D,][F,F,] [d2A2F2A,2A,2][D,2D,2] [d2G2E2G,,,2G,,,2][G,,G,,][B,,B,,][D,D,] | [BG,G,][D,D,][eB,,B,,][F,,F,,] [c2A2E2A,,,2A,,,2][A,,A,,][C,C,][E,E,] [ceA,A,A,][E,E,][C,C,][A,,A,,] | [f8d8A8F8A,8A,8A,,8A,,8] F8 |

aud=None
inp="Craft a musical work using ABC notation that that adhere to the set chord modifications. 'D', 'G', 'C', 'B', 'C', 'D', 'D', 'B'"
res=model.chat(prompt=inp, audio_files=aud, segs=None, tokenizer=tokenizer, temperature=1.0, max_new_tokens=4096)
print(res)
# X:1
# M:3/4
# L:1/16
# K:Bdor
# a4<G4 D2 |B2 D2B2 D2B2 D2 |B2 D2B2 D2 [b2b2b2] |g2 f2d2 a4<G4 D2 |B2 D2B2 D2B2 D2 |
# B2 g2D2 a2b2 g2 |f2 c'2f2 c'2f2 c'2 |f2 a2c'2 b2a2 c' |b2[a2b2]c' ^c'4 D2^f2 D2 |
# ^f2 D2^G2 D2d2 =g2 |D2 d2g2 a4<G4 D2 |B2 D2B2 D2B2 D2 |B2 D2B2 D2[A2B2] |
# B2G2 F2D2 a4<G4 D2 |B2 D2B2 D2B2 D2 |B2 D2B2 G2D2 [B2D2] |[G2E2] [F2D2][C2B,2] g2^d2 a2 |
# ^d2 g2d10 | [f2c'2][^d4a4][d2a2] g2 |^d2 a2d2 g2[d2d2] [f2c'2] |[^d4b4][d2b2] e2b2 e'2 |
# f2 b2c'2 b2c'2 e2 |b2 a2c'2 b2[a2b2] g2 |[^f2e2e2] c'2=f2 c'2f2 c'2 |f2 a2[c'2b2] f2c'2 e2 |
# b2 g2[_b2^d2] a2d2 f2 |[^d2g2] [f2f2]d2 f2[g2g2] [f2^g2] |b2 [a2f2][a2f2] [gc'][f2d2][c3c3] | D2B2 D2B2 D2B2 D2 |
# B2 D2B2 D2B4 |[a2d2] [g2b2][g2b2] c'2f2 c'2 |f2 a2c'2 g2[c'2a2] d2 |g2 a2[d2b2] [g2c'2][g2d'2] c'2 |
# [b2b2] [g2b2][f2d2] b2g2 f2 |d10 c'4 | f2c'2 b2c'2 b2g2 d2 |b2 c'2b2 c'2b2 f2 |
# e'2 c'2b2 a2b2 c'2 |b2 e'A2[g2b2] C2[B0B0][a2^d2]g2 C2 |d2D2d2 ^G2b2 D2 |D2 b2^d2 [e2e2c'2]=g2 g2 |
# [a2^d2] g2b2 =D2b2 D2 |b2 G2D2 D2[^d2g2] c'2 |[e4c'4][c'2g2g2] [b2c'2][a2d'2] [^d2e'2] |[f2c'2] [f2c'2][f2f2] [^d4b4] [d2g2] |
# [^g2a2] =g2[f2f'2] [B2e'2][d'2e2] d2 |b2 d'2b2 d'2b2 f2 |b2 b2d'2 ^d2b2 =d'2 |b2 d'2g'2 f2b2 d'2 |
# b2 d'2b2 d'2g'2 f2 |b2 g2[a2f2a2] C2B2 C2 |g2 D2B2 D2B2 D2 |B2 D2[B2F2] D2B2D2 |
# B2a2 g2[b2b2b2] g2f2 [c'2b2] |[f2f2] [c'2e2][f2g2] [c'2^f2][=f2a2] [c'2^g2] |[f2b2] [c'2d'2][f2e'2] [f2g'2][b2^f'2] [g2a'2] |[f2f'2] [b2e2][d2d2] c'2b2 |

Citation

@misc{jiang2025advancingfoundationmodelmusic,
      title={Advancing the Foundation Model for Music Understanding}, 
      author={Yi Jiang and Wei Wang and Xianwen Guo and Huiyun Liu and Hanrui Wang and Youri Xu and Haoqi Gu and Zhongqian Xie and Chuanjiang Luo},
      year={2025},
      eprint={2508.01178},
      archivePrefix={arXiv},
      primaryClass={cs.SD},
      url={https://arxiv.org/abs/2508.01178}, 
}

@misc {matthew_mitton_2025,
    author       = { {Matthew Mitton} },
    title        = { bread-midi-dataset (Revision 95c2155) },
    year         = 2025,
    url          = {\url{https://huggingface.co/datasets/breadlicker45/bread-midi-dataset }},
    doi          = { 10.57967/hf/4748 },
    publisher    = { Hugging Face }
}
Downloads last month
22
Safetensors
Model size
8.92B params
Tensor type
BF16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for Yi3852/MuFun-ABC

Base model

Qwen/Qwen3-8B-Base
Finetuned
Yi3852/MuFun-Base
Finetuned
(3)
this model

Datasets used to train Yi3852/MuFun-ABC

Collections including Yi3852/MuFun-ABC