Spaces:

Monke64
/

TechJamM2I

Paused

App Files Files Community

Monke64 commited on Jul 2, 2024

Commit

1580527

1 Parent(s): 18b4029

First commit

Browse files

Files changed (19) hide show

.gitattributes +1 -0
Weights/pytorch_lora_weights.safetensors +3 -0
flask/10.mp3 +0 -0
flask/Beat_tracking_service.py +20 -0
flask/Emotion_spotting_service.py +65 -0
flask/Genre_classifier_model.h5 +3 -0
flask/Genre_spotting_service.py +73 -0
flask/Image_generation_service.py +20 -0
flask/__pycache__/Beat_tracking_service.cpython-310.pyc +0 -0
flask/__pycache__/Beat_tracking_service.cpython-311.pyc +0 -0
flask/__pycache__/Emotion_spotting_service.cpython-310.pyc +0 -0
flask/__pycache__/Emotion_spotting_service.cpython-311.pyc +0 -0
flask/__pycache__/Genre_spotting_service.cpython-310.pyc +0 -0
flask/__pycache__/Genre_spotting_service.cpython-311.pyc +0 -0
flask/__pycache__/Image_generation_service.cpython-310.pyc +0 -0
flask/__pycache__/Image_generation_service.cpython-311.pyc +0 -0
flask/emotion_model.h5 +3 -0
main.py +77 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+flask/emotion_model.h5 filter=lfs diff=lfs merge=lfs -text

Weights/pytorch_lora_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1b19610541f9a2c6f235a1bac2690d04b98535f9f9f7790e9ad4d0fe8ac89b0
+size 3226184

flask/10.mp3 ADDED Viewed

Binary file (961 kB). View file

flask/Beat_tracking_service.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import librosa
+class _Beat_tracking_service():
+    instance = None
+    def __init__(self):
+        self.instance = 1
+    def get_beat(self,file_path):
+        y, sr = librosa.load(file_path)
+        beat = librosa.beat.beat_track(y=y, sr=sr)
+        return beat[0][0]
+def Beat_tracking_service():
+    if _Beat_tracking_service.instance == None:
+        _Beat_tracking_service.instance = _Beat_tracking_service()
+    return _Beat_tracking_service.instance
+# beat_tracking_service = Beat_tracking_service()
+# predicted_beat = beat_tracking_service.get_beat("10.mp3")
+# print(predicted_beat)

flask/Emotion_spotting_service.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import tensorflow.keras as keras
+import numpy as np
+import librosa
+import random
+#import tensorflow_addons
+SAMPLE_RATE = 22050
+class _Emotion_spotting_service():
+    model = None
+    #instance = None
+    mapping = [' amazement', ' solemnity', ' tenderness',
+               ' nostalgia', ' calmness', ' power',
+               ' joyfulness', ' tension',' sadness']
+    def __init__(self,model_path):
+        self.model = keras.models.load_model(model_path)
+    def predict(self,file_path):
+        log_spectrogram = self.preprocess(file_path)
+        X = np.array(log_spectrogram).astype("float32")
+        X = np.expand_dims(X, axis=0)
+        # Do predictions
+        num_predictions = self.model.predict(X)
+        prediction = np.argmax(num_predictions)
+        predicted_keyword = self.mapping[prediction]
+        return predicted_keyword
+    # Split audio into 10 second excerpts
+    # Attain log spectrogram with following parameters
+    # sample rate = 22050, n_fft = 2048, hop_length = 512
+    # output, 1024*431
+    def preprocess(self,file_path):
+        signal, sr = librosa.load(file_path,sr=SAMPLE_RATE)
+        signal_normalized = librosa.util.normalize(signal)
+        len_to_check = 10 * 22050
+        # If audio is less than 10 seconds, we pad it with zeroes
+        # If audio is more than 10 seconds, we split into segments and randomly choose one
+        if len(signal_normalized) < len_to_check:
+            num_zeros = len_to_check - len(signal_normalized)
+            signal_normalized = signal_normalized + [0] * num_zeros
+        elif len(signal_normalized) > len_to_check:
+            num_segments = len(signal_normalized)//len_to_check
+            segments = []
+            for i in range(num_segments):
+                start = i * len_to_check
+                end = start + len_to_check
+                if len(signal[start:end]) != len_to_check:
+                    continue
+                else:
+                    segments.append(signal[start:end])
+            signal_normalized = random.choice(segments)
+        stft = librosa.stft(signal_normalized, n_fft=2048,hop_length=512)[:-1]
+        spectrogram = np.abs(stft)
+        log_spectrogram = librosa.amplitude_to_db(spectrogram)
+        return log_spectrogram
+# def Emotion_spotting_service():
+#     if _Emotion_spotting_service.instance == None:
+#         _Emotion_spotting_service.instance = _Emotion_spotting_service()
+#         _Emotion_spotting_service.model = keras.models.load_model("ERM.h5")
+#     return _Emotion_spotting_service.instance
+# if __name__ == "__main__":
+#     emotion_service = _Emotion_spotting_service("emotion_model.h5")
+#     predicted_word = emotion_service.predict("10.mp3")
+#     print(predicted_word)

flask/Genre_classifier_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6572829f9add78efffc0f17a8b0447ce50f84f751b942ba9c7dbf14703603f28
+size 6874520

flask/Genre_spotting_service.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import librosa
+import numpy as np
+import tensorflow as tf
+#import argparse
+import json
+import tensorflow.keras as keras
+# Input dimension for model = 259*13
+mappings = ["blues","classical","country","disco","hip-hop","jazz","metal","pop","reggae","rock"]
+model_path = "Genre_classifier_model.h5"
+class _Genre_spotting_service():
+    instance = None
+    model = None
+    mappings = ["blues", "classical", "country", "disco", "hip-hop", "jazz", "metal", "pop", "reggae", "rock"]
+    def __init__(self,model_path):
+        self.model = tf.keras.models.load_model(model_path)
+    def predict(self,file_path):
+        input_data = self.preprocess_audio(file_path)
+        input_data = input_data[np.newaxis, ...]
+        predictions = self.model.predict(input_data)
+        text_predictions = [mappings[np.argmax(predictions, axis=1)[0]]]
+        return text_predictions
+    def preprocess_audio(self,file_path, target_frames=259, n_mfcc=13, sr=22050, n_fft=2048, hop_length=512):
+    # Load audio file
+        y, sr = librosa.load(file_path, sr=sr)
+    # Compute MFCCs
+        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
+    # Pad or truncate MFCCs to ensure they have target_frames
+        if mfccs.shape[1] < target_frames:
+            pad_width = target_frames - mfccs.shape[1]
+            mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
+        else:
+            mfccs = mfccs[:, :target_frames]
+        return mfccs.T
+def Genre_spotting_service():
+    if _Genre_spotting_service.instance == None:
+        _Genre_spotting_service.instance = _Genre_spotting_service()
+        _Genre_spotting_service.model = keras.models.load_model(model_path)
+    return _Genre_spotting_service.instance
+# if __name__ == "__main__":
+#     genre_service = _Genre_spotting_service("Genre_classifier_model.h5")
+#     predicted_genre = genre_service.predict("10.mp3")
+#     print(predicted_genre)
+# file_path = "10.mp3"
+# target_frames=259
+# n_mfcc=13
+# sr=22050
+# n_fft=2048
+# hop_length=512
+# # Load audio file
+# y, sr = librosa.load(file_path, sr=sr)
+#
+# # Compute MFCCs
+# mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
+# print(str(sr))
+#
+# # Pad or truncate MFCCs to ensure they have target_frames
+# if mfccs.shape[1] < target_frames:
+#     pad_width = target_frames - mfccs.shape[1]
+#     mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
+# else:
+#     mfccs = mfccs[:, :target_frames]
+# print(mfccs)

flask/Image_generation_service.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from diffusers import StableDiffusionPipeline
+import torch
+class _Image_generation_service():
+    instance = None
+    def get_image(self,prompt):
+        pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
+        pipeline.load_lora_weights("../Weights/pytorch_lora_weights.safetensors", weight_name="pytorch_lora_weights.safetensors")
+        image = pipeline(prompt)
+        return image
+def Image_generation_service():
+    if _Image_generation_service.instance == None:
+        _Image_generation_service.instance = _Image_generation_service()
+    return _Image_generation_service.instance
+# if __name__ == "__main__":
+#     image_service = Image_generation_service()
+#     gen_image = image_service.get_image("A calm piece of music")
+#     print(gen_image)

flask/__pycache__/Beat_tracking_service.cpython-310.pyc ADDED Viewed

Binary file (946 Bytes). View file

flask/__pycache__/Beat_tracking_service.cpython-311.pyc ADDED Viewed

Binary file (1.36 kB). View file

flask/__pycache__/Emotion_spotting_service.cpython-310.pyc ADDED Viewed

Binary file (1.88 kB). View file

flask/__pycache__/Emotion_spotting_service.cpython-311.pyc ADDED Viewed

Binary file (3.5 kB). View file

flask/__pycache__/Genre_spotting_service.cpython-310.pyc ADDED Viewed

Binary file (1.91 kB). View file

flask/__pycache__/Genre_spotting_service.cpython-311.pyc ADDED Viewed

Binary file (2.61 kB). View file

flask/__pycache__/Image_generation_service.cpython-310.pyc ADDED Viewed

Binary file (1.16 kB). View file

flask/__pycache__/Image_generation_service.cpython-311.pyc ADDED Viewed

Binary file (1.77 kB). View file

flask/emotion_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f88238ef0a8671880b24d992e435aad8b169717f83231824229c0911253869fd
+size 246095376

main.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import streamlit as st
+from flask.Emotion_spotting_service import _Emotion_spotting_service
+from flask.Genre_spotting_service import _Genre_spotting_service
+from flask.Beat_tracking_service import _Beat_tracking_service
+from diffusers import StableDiffusionPipeline
+import torch
+emo_list = []
+gen_list = []
+tempo_list = []
+@st.cache_resource
+def load_emo_model():
+    emo_service = _Emotion_spotting_service("flask/emotion_model.h5")
+    return emo_service
+@st.cache_resource
+def load_genre_model():
+    gen_service = _Genre_spotting_service("flask/Genre_classifier_model.h5")
+    return gen_service
+@st.cache_resource
+def load_beat_model():
+    beat_service = _Beat_tracking_service()
+    return beat_service
+@st.cache_resource
+def load_image_model():
+     pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",torch_dtype=torch.float16).to("cuda")
+     pipeline.load_lora_weights("Weights/pytorch_lora_weights.safetensors", weight_name="pytorch_lora_weights.safetensors")
+     return pipeline
+if 'emotion' not in st.session_state:
+    st.session_state.emotion = None
+if 'genre' not in st.session_state:
+    st.session_state.genre = None
+if 'beat' not in st.session_state:
+    st.session_state.beat = None
+emotion_service = load_emo_model()
+genre_service = load_genre_model()
+beat_service = load_beat_model()
+image_service = load_image_model()
+st.title("Music2Image webpage")
+user_input = st.file_uploader("Upload your wav/mp3 files here", type=["wav","mp3"],key = "file_uploader")
+st.caption("Generate images from your audio file")
+st.audio(user_input)
+c1,c2,c3 = st.columns([1,1,1])
+with c1:
+    if st.button("Generate emotion"):
+        emotion = emotion_service.predict(user_input)
+        st.session_state.emotion = emotion
+    st.text(st.session_state.emotion)
+with c2:
+    if st.button("Generate genre"):
+        genre = genre_service.predict(user_input)
+        st.session_state.genre = genre
+    st.text(st.session_state.genre)
+with c3:
+    if st.button("Generate beat"):
+        beat = beat_service.get_beat(user_input)
+        st.session_state.beat = beat
+    st.text(st.session_state.beat)
+if st.session_state.emotion != None and st.session_state.genre != None and st.session_state.beat != None:
+    text_output = None
+    if st.button("Generate text description to be fed into stable diffusion"):
+        st.caption("Text description of your music file")
+        text_output = "This piece of music falls under the " + st.session_state.genre[0] + " genre. It is of tempo " + str(int(st.session_state.beat)) + " and evokes a sense of" + st.session_state.emotion + "."
+        st.text(text_output)
+    if text_output:
+        if st.button("Generate image from text description"):
+            image = image_service(text_output)
+            st.image(image)

requirements.txt ADDED Viewed

Binary file (3.83 kB). View file