Spaces:
Runtime error
Runtime error
import numpy as np | |
import tensorflow as tf | |
from scipy.io.wavfile import write | |
import keras.backend as K | |
import librosa.display | |
import cv2 | |
import librosa | |
import matplotlib.pyplot as plt | |
import librosa.display | |
import numpy as np | |
from keras.applications import VGG16 | |
import os | |
import scipy | |
import gradio as gr | |
# Load the tune recognition model | |
model = tf.keras.models.load_model('embdmodel_1.hdf5') | |
embedding_model=model.layers[2] | |
DURATION = 10 | |
WAVE_OUTPUT_FILE = "my_audio.wav" | |
# Define function to preprocess input audio | |
#convert song to mel spectogram as siamese network doesn't work on sound directly | |
def create_spectrogram(clip,sample_rate,save_path): | |
plt.interactive(False) | |
fig=plt.figure(figsize=[0.72,0.72]) | |
S=librosa.feature.melspectrogram(y=clip,sr=sample_rate) | |
librosa.display.specshow(librosa.power_to_db(S,ref=np.max)) | |
fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0) | |
plt.close() | |
fig.clf() | |
plt.close(fig) | |
plt.close('all') | |
del save_path,clip,sample_rate,fig,S | |
def load_img(path): | |
img=cv2.imread(path) | |
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) | |
img=cv2.resize(img,(150,150)) | |
return img | |
import pickle | |
with open('dict.pickle', 'rb') as handle: | |
songspecdict = pickle.load(handle) | |
def list_file_sizes(): | |
path = "." | |
# Get list of all files only in the given directory | |
fun = lambda x : os.path.isfile(os.path.join(path,x)) | |
files_list = filter(fun, os.listdir(path)) | |
# Create a list of files in directory along with the size | |
size_of_file = [ | |
(f,os.stat(os.path.join(path, f)).st_size) | |
for f in files_list | |
] | |
# Iterate over list of files along with size | |
# and print them one by one. | |
for f,s in size_of_file: | |
print("{} : {}MB".format(f, round(s/(1024*1024),3))) | |
def main(audio): | |
with open(WAVE_OUTPUT_FILE, "wb") as file: | |
file.write(audio) | |
list_file_sizes() | |
# Load the song to match | |
song, sr = librosa.load("my_audio.wav") | |
to_match = np.copy(song[0:220500]) | |
print("Loaded data into librosa...") | |
# Create spectrogram image of the song to match | |
create_spectrogram(to_match, sr, 'test.png') | |
print("Created spectogram...") | |
# Load the spectrogram image of the song to match | |
to_match_img = load_img('test.png') | |
to_match_img = np.expand_dims(to_match_img, axis=0) | |
print("Loaded spectrum image...") | |
# Get the embedding of the song to match | |
to_match_emb = embedding_model.predict(to_match_img) | |
print("Get song embedding...") | |
# Calculate the distances between the song to match and the songs in the database | |
songsdistdict = {} | |
for key, values in songspecdict.items(): | |
dist_array = [] | |
for embd in values: | |
dist_array.append(np.linalg.norm(to_match_emb - embd)) | |
songsdistdict[key] = min(dist_array) | |
song_titles=list(songsdistdict.keys()) | |
distances=list(songsdistdict.values()) | |
# Get the title and artist of the recognized song | |
recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-') | |
recognized_song_title = os.path.splitext(recognized_song_title)[0] | |
print(f'Artist: {recognized_song_artist}') | |
print(f'Title: {recognized_song_title}') | |
from musixmatch import Musixmatch | |
# Initialize Musixmatch API | |
musixmatch = Musixmatch(apikey='2b0d0615efa782e95598a0e99bda4a60') | |
# Search for the recognized song | |
track_search_results = musixmatch.track_search(q_track=recognized_song_title, q_artist=recognized_song_artist, page_size=1, page=1, s_track_rating='desc') | |
if track_search_results['message']['header']['status_code'] == 200: | |
# Get the track ID for the top result | |
track_id = track_search_results['message']['body']['track_list'][0]['track']['track_id'] | |
# Get the lyrics for the recognized song | |
lyrics_result = musixmatch.track_lyrics_get(track_id=track_id) | |
if lyrics_result['message']['header']['status_code'] == 200: | |
# Get the lyrics | |
lyrics = lyrics_result['message']['body']['lyrics']['lyrics_body'] | |
# Remove the annotation tags from the lyrics | |
lyrics = lyrics.replace('******* This Lyrics is NOT for Commercial use *******', '').strip() | |
print("Lyrics:\n", lyrics) | |
else: | |
print("Couldn't find lyrics for the recognized song.") | |
# Play the recognized song | |
recognized_song_file = f'https://huggingface.co/spaces/prerna9811/Chord/tree/main/seismese_net_songs/{song_titles[distances.index(min(distances))]}' | |
recognized_song_audio, recognized_song_sr = librosa.load(recognized_song_file) | |
audio_file = open(recognized_song_file, 'rb') # enter the filename with filepath | |
audio_bytes = audio_file.read() # reading the file | |
return audio_bytes | |
css = """ | |
footer {display:none !important} | |
.output-markdown{display:none !important} | |
button.primary { | |
z-index: 14; | |
left: 0px; | |
top: 0px; | |
cursor: pointer !important; | |
background: none rgb(17, 20, 45) !important; | |
border: none !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 6px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: none !important; | |
} | |
button.primary:hover{ | |
z-index: 14; | |
left: 0px; | |
top: 0px; | |
cursor: pointer !important; | |
background: none rgb(37, 56, 133) !important; | |
border: none !important; | |
color: rgb(255, 255, 255) !important; | |
line-height: 1 !important; | |
border-radius: 6px !important; | |
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important; | |
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important; | |
} | |
button.gallery-item:hover { | |
border-color: rgb(37 56 133) !important; | |
background-color: rgb(229,225,255) !important; | |
} | |
""" | |
demo = gr.Blocks() | |
mf_transcribe = gr.Interface( | |
fn=main, | |
inputs=gr.inputs.Audio(source="microphone", type="filepath"), | |
outputs="audio", | |
layout="horizontal", | |
theme="huggingface", | |
allow_flagging="never", | |
css = css | |
) | |
mf_transcribe.launch() | |