nevoit commited on Aug 9, 2023

Commit

f5586d9

1 Parent(s): 36ac81c

Upload 24 files

Browse files

Files changed (24) hide show

compute_score.py +75 -0
data_loader.py +185 -0
experiment.py +519 -0
extract_melodies_features.py +179 -0
figures/1.PNG +0 -0
figures/10.PNG +0 -0
figures/11.PNG +0 -0
figures/12.PNG +0 -0
figures/13.PNG +0 -0
figures/14.PNG +0 -0
figures/15.PNG +0 -0
figures/2.PNG +0 -0
figures/3.PNG +0 -0
figures/4.PNG +0 -0
figures/5.PNG +0 -0
figures/6.PNG +0 -0
figures/7.PNG +0 -0
figures/8.PNG +0 -0
figures/9.PNG +0 -0
lstm_lyrics.py +76 -0
lstm_melodies_lyrics.py +79 -0
prepare_data.py +112 -0
readme.md +385 -0
rnn.py +108 -0

compute_score.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+This file computes the scores of the generated sentences
+"""
+import numpy as np
+from numpy import dot
+from numpy.linalg import norm
+from textblob import TextBlob
+def calculate_cosine_similarity_n_gram(all_generated_lyrics, all_original_lyrics, n, word2vec):
+    """
+    This function computes the similarity between 'n' words that are adjacent to each other.
+    :param all_generated_lyrics: list of all generated lyrics
+    :param all_original_lyrics: list of all original lyrics
+    :param n: size of grams
+    :param word2vec: a dictionary between word and index
+    :return: mean similarity between the all_generated_lyrics and all_original_lyrics
+    """
+    cos_sim_list = []
+    for song_original_lyrics, song_generated_lyrics in zip(all_original_lyrics, all_generated_lyrics):
+        if len(song_original_lyrics) != len(song_generated_lyrics):
+            raise Exception('The vectors are not equal')
+        cos_sim_song_list = []
+        for i in range(len(song_original_lyrics) - n + 1):
+            starting_index = i
+            ending_index = i + n
+            n_gram_original = song_original_lyrics[starting_index:ending_index]
+            n_gram_generated = song_generated_lyrics[starting_index:ending_index]
+            original_vector = np.mean([word2vec[word] for word in n_gram_original], axis=0)
+            generated_vector = np.mean([word2vec[word] for word in n_gram_generated], axis=0)
+            cos_sim = dot(original_vector, generated_vector) / (norm(original_vector) * norm(generated_vector))
+            cos_sim_song_list.append(cos_sim)
+        cos_sim_song = np.mean(cos_sim_song_list)
+        cos_sim_list.append(cos_sim_song)
+    return np.mean(cos_sim_list)
+def calculate_cosine_similarity(all_generated_lyrics, all_original_lyrics, word2vec):
+    # The similarity between the generated lyrics and the original lyrics.
+    cos_sim_list = []
+    for song_original_lyrics, song_generated_lyrics in zip(all_original_lyrics, all_generated_lyrics):
+        original_vector = np.mean([word2vec[word] for word in song_original_lyrics], axis=0)
+        generated_vector = np.mean([word2vec[word] for word in song_generated_lyrics], axis=0)
+        cos_sim = dot(original_vector, generated_vector) / (norm(original_vector) * norm(generated_vector))
+        cos_sim_list.append(cos_sim)
+    return np.mean(cos_sim_list)
+def get_polarity_diff(all_generated_lyrics, all_original_lyrics):
+    # The polarity score is a float within the range [-1.0, 1.0].
+    pol_diff_list = []
+    for song_original_lyrics, song_generated_lyrics in zip(all_original_lyrics, all_generated_lyrics):
+        generated_lyrics = ' '.join(song_original_lyrics)
+        generated_blob = TextBlob(generated_lyrics)
+        original_lyrics = ' '.join(song_generated_lyrics)
+        original_blob = TextBlob(original_lyrics)
+        pol_diff = abs(generated_blob.sentiment.polarity - original_blob.sentiment.polarity)
+        pol_diff_list.append(pol_diff)
+    return np.mean(pol_diff_list)
+def get_subjectivity_diff(all_generated_lyrics, all_original_lyrics):
+    # The subjectivity is a float within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective.
+    pol_diff_list = []
+    for song_original_lyrics, song_generated_lyrics in zip(all_original_lyrics, all_generated_lyrics):
+        generated_lyrics = ' '.join(song_original_lyrics)
+        generated_blob = TextBlob(generated_lyrics)
+        original_lyrics = ' '.join(song_generated_lyrics)
+        original_blob = TextBlob(original_lyrics)
+        pol_diff = abs(generated_blob.sentiment.subjectivity - original_blob.sentiment.subjectivity)
+        pol_diff_list.append(pol_diff)
+    return np.mean(pol_diff_list)
+print("Loaded Successfully")

data_loader.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""
+This file manages the loading of the data
+"""
+import csv
+import os
+import pickle
+import string
+import numpy as np
+import pretty_midi
+def get_midi_files(midi_pickle, midi_folder, artists, names):
+    """
+    This function loads the midi files
+    :param midi_pickle: path for the pickle file
+    :param midi_folder: path for the midi folder
+    :param artists: list of artist
+    :param names: list of song names
+    :return: list of pretty midi objects
+    """
+    # If the pickle file is already exists, read that file
+    pretty_midi_songs = _read_pickle_if_exists(pickle_path=midi_pickle)
+    if pretty_midi_songs is None:  # If the pickle is exists, covert the list into variables
+        pretty_midi_songs = []
+        lower_upper_files = get_lower_upper_dict(midi_folder)
+        if len(artists) != len(names):
+            raise Exception('Artists and Names lengths are different.')
+        for artist, song_name in zip(artists, names):
+            if song_name[0] == " ":
+                song_name = song_name[1:]
+            song_file_name = f'{artist}_-_{song_name}.mid'.replace(" ", "_")
+            if song_file_name not in lower_upper_files:
+                print(f'Song {song_file_name} does not exist, even though'
+                      f' the song is provided in the training or testing sets')
+                continue
+            original_file_name = lower_upper_files[song_file_name]
+            midi_file_path = os.path.join(midi_folder, original_file_name)
+            try:
+                pretty_midi_format = pretty_midi.PrettyMIDI(midi_file_path)
+                pretty_midi_songs.append(pretty_midi_format)
+            except Exception:
+                print(f'Exception raised from Mido using this file: {midi_file_path}')
+        _save_pickle(pickle_path=midi_pickle, content=pretty_midi_songs)
+    return pretty_midi_songs
+def get_lower_upper_dict(midi_folder):
+    """
+    This function maps between lower case name to upper case name
+    :param midi_folder: midi folder path
+    :return: A dictionary between lower case name to upper case name
+    """
+    lower_upper_files = {}
+    for file_name in os.listdir(midi_folder):
+        if file_name.endswith(".mid"):
+            lower_upper_files[file_name.lower()] = file_name
+    return lower_upper_files
+def get_input_sets(input_file, pickle_path, word2vec, midi_folder) -> (list, list, list):
+    """
+    This function loads the training and testing set that provided by the course staff.
+    In addition some pre-processing methods are work here.
+    :param input_file: training or testing set path
+    :param pickle_path: training or testing pickle path
+    :param word2vec: dictionary maps between a word and a vector
+    :param midi_folder: the midi folder that we use to validate if song is exists
+    :return: Nothing
+    """
+    # If the pickle file is already exists, read that file
+    pickle_value = _read_pickle_if_exists(pickle_path=pickle_path)
+    # We want only songs with midi file
+    lower_upper_files = get_lower_upper_dict(midi_folder)
+    if pickle_value is not None:  # If the pickle is exists, covert the list into variables
+        artists, names, lyrics = pickle_value[0], pickle_value[1], pickle_value[2]
+    else:  # The pickle file is exists.
+        artists, names, lyrics = [], [], []
+        with open(input_file, newline='') as f:
+            lines = csv.reader(f, delimiter=',', quotechar='|')
+            for row in lines:
+                artist_name = row[0]
+                song_name = row[1]
+                if song_name[0] == " ":
+                    song_name = song_name[1:]
+                song_file_name = f'{artist_name}_-_{song_name}.mid'.replace(" ", "_")
+                if song_file_name not in lower_upper_files:
+                    print(f'Song {song_file_name} does not exist, even though'
+                          f' the song is provided in the training or testing sets')
+                    continue
+                original_file_name = lower_upper_files[song_file_name]
+                midi_file_path = os.path.join(midi_folder, original_file_name)
+                try:
+                    pretty_midi.PrettyMIDI(midi_file_path)
+                except Exception:
+                    print(f'Exception raised from Mido using this file: {midi_file_path}')
+                    continue
+                song_lyrics = row[2]
+                song_lyrics = song_lyrics.replace('&', '')
+                song_lyrics = song_lyrics.replace('  ', ' ')
+                song_lyrics = song_lyrics.replace('\'', '')
+                song_lyrics = song_lyrics.replace('--', ' ')
+                tokens = song_lyrics.split()
+                table = str.maketrans('', '', string.punctuation)  # remove punctuation from each token
+                tokens = [w.translate(table) for w in tokens]
+                tokens = [word for word in tokens if
+                          word.isalpha()]  # remove remaining tokens that are not alphabetic
+                tokens = [word.lower() for word in tokens if word.lower() in word2vec]  # make lower case
+                song_lyrics = ' '.join(tokens)
+                artists.append(artist_name)
+                names.append(song_name)
+                lyrics.append(song_lyrics)
+        _save_pickle(pickle_path=pickle_path, content=[artists, names, lyrics])
+    return {'artists': artists, 'names': names, 'lyrics': lyrics}
+def get_word2vec(word2vec_path, pre_trained, vector_size, encoding='utf-8') -> dict:
+    """
+    This function returns a dictionary that maps between word and a vector
+    :param word2vec_path: path for the pickle file
+    :param pre_trained: path for the pre-trained embedding file
+    :param vector_size: the vector size for each word
+    :param encoding: the encoding the the pre_trained file
+    :return: dictionary maps between a word and a vector
+    """
+    # If the pickle file is already exists, read that file
+    word2vec = _read_pickle_if_exists(word2vec_path)
+    if word2vec is None:  # The pickle file is not exists.
+        with open(pre_trained, 'r', encoding=encoding) as f:  # Read a pre-trained word vectors.
+            list_of_lines = list(f)
+        word2vec = _iterate_over_glove_list(list_of_lines=list_of_lines, vector_size=vector_size)
+        _save_pickle(pickle_path=word2vec_path, content=word2vec)  # Save pickle for the next running
+    return word2vec
+def _iterate_over_glove_list(list_of_lines, vector_size):
+    """
+    This function iterates over the glove list line by line and returns a word2vec dictionary
+    :param list_of_lines: List of glove lines
+    :param vector_size: the size of the embedding vector size
+    :return: dictionary maps between a word and a vector
+    """
+    word2vec = {}
+    punctuation = string.punctuation
+    for line in list_of_lines:
+        values = line.split(' ')
+        word = values[0]
+        if word in punctuation:
+            continue
+        vec = np.asarray(values[1:], "float32")
+        if len(vec) != vector_size:
+            raise Warning(f"Vector size is different than {vector_size}")
+        else:
+            word2vec[word] = vec
+    return word2vec
+def _save_pickle(pickle_path, content):
+    """
+    This function saves a value to pickle file
+    :param pickle_path: path for the pickle file
+    :param content: the value you want to save
+    :return: Nothing
+    """
+    with open(pickle_path, 'wb') as f:
+        pickle.dump(content, f)
+def _read_pickle_if_exists(pickle_path):
+    """
+    This function reads a pickle file
+    :param pickle_path:path for the pickle file
+    :return: the saved value in the pickle file
+    """
+    pickle_file = None
+    if os.path.exists(pickle_path):
+        with open(pickle_path, 'rb') as f:
+            pickle_file = pickle.load(f)
+    return pickle_file
+print('Loaded Successfully')

experiment.py ADDED Viewed

	@@ -0,0 +1,519 @@

+"""
+This file manages the experiments, see the main function for changing the settings
+"""
+import os
+import random
+import time
+import pandas as pd
+from gtts import gTTS
+from keras_preprocessing.text import Tokenizer
+def main():
+    """
+    This function runs the process of the experiments. Iterates over the parameters and output the results.
+    :return: Nothing
+    """
+    # Some settings for the files we will use
+    saved_file_type = 'pkl'
+    midi_pickle = os.path.join(PICKLES_FOLDER, f"midi.{saved_file_type}")
+    midi_folder = os.path.join(DATA_PATH, "midi_files")
+    # Read a pre-trained word2vec dictionary
+    word2vec_path = os.path.join(PICKLES_FOLDER, f"{WORD2VEC_FILENAME}.{saved_file_type}")
+    pre_trained = os.path.join(INPUT_FOLDER, f"{GLOVE_FILE_NAME}.txt")
+    # Get the embedding dictionary that maps between word to a vector
+    word2vec = get_word2vec(word2vec_path=word2vec_path,
+                            pre_trained=pre_trained,
+                            vector_size=VECTOR_SIZE,
+                            encoding=ENCODING)
+    # load the training and testing set that provided by the course staff
+    train_pickle_path = os.path.join(PICKLES_FOLDER, f'{TRAIN_NAME}.{saved_file_type}')
+    input_train_path = os.path.join(INPUT_FOLDER, INPUT_TRAINING_SET)
+    training_set = get_input_sets(input_file=input_train_path,
+                                  pickle_path=train_pickle_path,
+                                  word2vec=word2vec,
+                                  midi_folder=midi_folder)
+    test_pickle_path = os.path.join(PICKLES_FOLDER, f'{TEST_NAME}.{saved_file_type}')
+    input_test_path = os.path.join(INPUT_FOLDER, INPUT_TESTING_SET)
+    testing_set = get_input_sets(input_file=input_test_path,
+                                 pickle_path=test_pickle_path,
+                                 word2vec=word2vec,
+                                 midi_folder=midi_folder)
+    artists = training_set['artists'] + testing_set['artists']
+    songs_names = training_set['names'] + testing_set['names']
+    lyrics = training_set['lyrics'] + testing_set['lyrics']
+    tokenizer = Tokenizer()
+    tokenizer.fit_on_texts(lyrics)
+    total_words = len(tokenizer.word_index) + 1
+    encoded_lyrics_list = tokenizer.texts_to_sequences(lyrics)
+    index2word = tokenizer.index_word
+    melodies = get_midi_files(midi_folder=midi_folder,
+                              midi_pickle=midi_pickle,
+                              artists=artists,
+                              names=songs_names)
+    train_encoded_lyrics_list = encoded_lyrics_list[:len(training_set['lyrics'])]
+    test_encoded_lyrics_list = encoded_lyrics_list[len(training_set['lyrics']):]
+    melody_pickle = os.path.join(PICKLES_FOLDER, "melody_data." + saved_file_type)
+    comb_dict = {'seed': [], 'seq_length': [], 'learning_rate': [], 'batch_size': [], 'epochs': [],
+                 'patience': [], 'min_delta': [], 'melody_method': [], 'model_names': [], 'cos_sim_1_gram': [],
+                 'cos_sim_2_gram': [],
+                 'cos_sim_3_gram': [], 'cos_sim_5_gram': [], 'cos_sim_max_gram': [], 'polarity_diff': [],
+                 'subjectivity_diff': [], 'loss_val': [], 'accuracy': []}
+    word2vec_matrix = get_word2vec_matrix(total_words=total_words,
+                                          index2word=index2word,
+                                          word2vec=word2vec,
+                                          vector_size=VECTOR_SIZE)
+    for seed in seeds_list:
+        for sl in seq_length_list:
+            sets_dict = create_sets(
+                train_encoded_lyrics_list=train_encoded_lyrics_list,
+                test_encoded_lyrics_list=test_encoded_lyrics_list,
+                total_words=total_words,
+                seq_length=sl,
+                validation_set_size=VALIDATION_SET_SIZE,
+                seed=seed)
+            training_sequences = sets_dict['train'][1].shape[0] + sets_dict['validation'][1].shape[0]
+            for melody_method in melody_extraction:
+                m_train, m_val, m_test = get_melody_data_sets(
+                    train_num=training_sequences,
+                    val_size=VALIDATION_SET_SIZE,
+                    melodies_list=melodies,
+                    sequence_length=sl,
+                    encoded_lyrics_matrix=encoded_lyrics_list,
+                    pkl_file_path=melody_pickle,
+                    seed=seed,
+                    feature_method=melody_method)
+                melody_feature_vector_size = m_train.shape[2]
+                for l in learning_rate_list:
+                    for bs in batch_size_list:
+                        for ep in epochs_list:
+                            for pa in patience_list:
+                                for md in min_delta_list:
+                                    for u in units_list:
+                                        for m_name in model_names_list:
+                                            run_combination(comb_dict, sl, bs, ep, index2word, l, md, pa, seed,
+                                                            testing_set['artists'], melody_method,
+                                                            testing_set['lyrics'], testing_set['names'], total_words, u,
+                                                            word2vec,
+                                                            word2vec_matrix, tokenizer, sets_dict['train'][0],
+                                                            sets_dict['validation'][0], sets_dict['test'][0], m_train,
+                                                            m_val, m_test, sets_dict['train'][1],
+                                                            sets_dict['validation'][1], sets_dict['test'][1], m_name,
+                                                            melody_feature_vector_size)
+                                            if m_name == 'lyrics':
+                                                break
+    # Here we save all the results to a csv file
+    comb_df = pd.DataFrame.from_dict(comb_dict)
+    comb_df.to_csv(COMB_PATH, index=False)
+def run_combination(comb_dict, seq_length, batch_size, epochs, index2word, learning_rate, min_delta, patience, seed,
+                    test_artists, melody_extraction_method,
+                    test_lyrics, test_names, total_words, units, word2vec, word2vec_matrix, tokenizer, x_train,
+                    x_val, x_test, m_train, m_val, m_test, y_train, y_val, y_test, model_name, melody_num_features):
+    """
+    This function runs a combination with a specific settings and training or testing set
+    :param melody_extraction_method: The method used to extract melody features (naive or with meta data)
+    :param comb_dict: dictionary of all the results
+    :param seq_length: this is the input sequence length we used for the LSTM model
+    :param batch_size: the batch size for the model
+    :param epochs: number of epochs for the model
+    :param index2word: a dictionary maps between index and words.
+    :param learning_rate: learning rate for the model
+    :param min_delta: minimum delta for early stopping of the model
+    :param patience: patience fo the early stopping of the model
+    :param seed: for the random state
+    :param test_artists: list of artist in the training set
+    :param test_lyrics: list of lyrics in the training set
+    :param test_names: list of songs name in the training set
+    :param total_words: total size of the vocabulary
+    :param units: number of LSTM units
+    :param word2vec: dictionary maps between a word and a vector
+    :param word2vec_matrix: a matrix of words (rows) and vectors (columns) of the word2vec
+    :param tokenizer: Tokenizer object
+    :param x_train: lyrics training set
+    :param x_val: lyrics validation set
+    :param x_test: lyrics testing xet
+    :param m_train: melody training set
+    :param m_val: melody validation set
+    :param m_test: melody testing set
+    :param y_train: training output words
+    :param y_val: validation output words
+    :param y_test: testing output words
+    :param model_name: the name of the model we want to use in this function
+    :param melody_num_features: size of the melody vector
+    :return: Nothing
+    """
+    model_save_type = 'h5'  # file type
+    initialize_seed(seed)  # files paths
+    parameters_name = f'seq_lens_{seq_length}_seed_{seed}_u_{units}_lr_{learning_rate}_bs_{batch_size}_ep_{epochs}_' \
+                      f'val_{VALIDATION_SET_SIZE}_pa_{patience}_md_{min_delta}_mn_{model_name}'
+    if not model_name == 'lyrics':
+        parameters_name += f'_fm_{melody_extraction_method}'
+    # A path for the weights
+    load_weights_path = os.path.join(WEIGHTS_FOLDER, f'weights_{parameters_name}.{model_save_type}')
+    model = None
+    if model_name == 'lyrics':
+        model = LSTMLyrics(seed=seed,
+                           loss=LOSS,
+                           metrics=METRICS,
+                           optimizer=OPTIMIZER,
+                           learning_rate=learning_rate,
+                           total_words=total_words,
+                           seq_length=seq_length,
+                           vector_size=VECTOR_SIZE,
+                           word2vec_matrix=word2vec_matrix,
+                           units=units)
+    elif model_name == 'melodies_lyrics':
+        x_train = [x_train, m_train]
+        x_val = [x_val, m_val]
+        x_test = [x_test, m_test]
+        model = LSTMLyricsMelodies(seed=seed,
+                                   loss=LOSS,
+                                   metrics=METRICS,
+                                   optimizer=OPTIMIZER,
+                                   learning_rate=learning_rate,
+                                   total_words=total_words,
+                                   seq_length=seq_length,
+                                   vector_size=VECTOR_SIZE,
+                                   word2vec_matrix=word2vec_matrix,
+                                   units=units,
+                                   melody_num_features=melody_num_features)
+    model.fit(weights_file=load_weights_path,
+              batch_size=batch_size,
+              epochs=epochs,
+              patience=patience,
+              min_delta=min_delta,
+              x_train=x_train,
+              y_train=y_train,
+              x_val=x_val,
+              y_val=y_val)
+    loss_val, accuracy = model.evaluate(x_test=x_test, y_test=y_test, batch_size=batch_size)
+    print(f'Loss on Testing set: {loss_val}')
+    print(f'Accuracy on Testing set: {accuracy}')
+    all_original_lyrics, all_generated_lyrics = generate_lyrics(
+        model_name=model_name,
+        word_index=index2word,
+        seq_length=seq_length,
+        model=model,
+        tokenizer=tokenizer,
+        artists=test_artists,
+        lyrics=test_lyrics,
+        names=test_names,
+        word2vec=word2vec,
+        melodies=m_test
+    )
+    cos_sim_1_gram = calculate_cosine_similarity_n_gram(all_generated_lyrics=all_generated_lyrics,
+                                                        all_original_lyrics=all_original_lyrics,
+                                                        n=1,
+                                                        word2vec=word2vec)
+    print(f'Mean Cosine Similarity (1-gram): {cos_sim_1_gram}')
+    cos_sim_2_gram = calculate_cosine_similarity_n_gram(all_generated_lyrics=all_generated_lyrics,
+                                                        all_original_lyrics=all_original_lyrics,
+                                                        n=2,
+                                                        word2vec=word2vec)
+    print(f'Mean Cosine Similarity (2-gram): {cos_sim_2_gram}')
+    cos_sim_3_gram = calculate_cosine_similarity_n_gram(all_generated_lyrics=all_generated_lyrics,
+                                                        all_original_lyrics=all_original_lyrics,
+                                                        n=3,
+                                                        word2vec=word2vec)
+    print(f'Mean Cosine Similarity (3-gram): {cos_sim_3_gram}')
+    cos_sim_5_gram = calculate_cosine_similarity_n_gram(all_generated_lyrics=all_generated_lyrics,
+                                                        all_original_lyrics=all_original_lyrics,
+                                                        n=5,
+                                                        word2vec=word2vec)
+    print(f'Mean Cosine Similarity (5-gram): {cos_sim_5_gram}')
+    cos_sim = calculate_cosine_similarity(all_generated_lyrics=all_generated_lyrics,
+                                          all_original_lyrics=all_original_lyrics,
+                                          word2vec=word2vec)
+    print(f'Mean Cosine Similarity (Max-gram): {cos_sim}')
+    pol_dif = get_polarity_diff(all_generated_lyrics=all_generated_lyrics, all_original_lyrics=all_original_lyrics)
+    print(f'Mean Polarity Difference: {pol_dif}')
+    subj_dif = get_subjectivity_diff(all_generated_lyrics=all_generated_lyrics, all_original_lyrics=all_original_lyrics)
+    print(f'Mean Subjectivity Difference: {subj_dif}')
+    update_comb_dict(batch_size, comb_dict, cos_sim, cos_sim_1_gram, cos_sim_2_gram, cos_sim_3_gram, cos_sim_5_gram,
+                     epochs, learning_rate, min_delta, model_name, patience, pol_dif, seed, seq_length, subj_dif,
+                     melody_extraction_method, loss_val, accuracy)
+def update_comb_dict(batch_size, comb_dict, cos_sim, cos_sim_1_gram, cos_sim_2_gram, cos_sim_3_gram, cos_sim_5_gram,
+                     epochs, learning_rate, min_delta, model_name, patience, pol_dif, seed, seq_length, subj_dif,
+                     melody_extraction_method, loss_val, accuracy):
+    """
+    This function update the combination dictionary to write to csv
+    :param accuracy: accuracy on the testing set
+    :param loss_val: loss on the testing set
+    :param batch_size: the batch size for the model
+    :param comb_dict: the results dictionary
+    :param cos_sim: the similarity score between the original and the generated sentence
+    :param cos_sim_1_gram: the similarity score between each 1 gram of original and the generated sentence
+    :param cos_sim_2_gram: the similarity score between each 2 gram of original and the generated sentence
+    :param cos_sim_3_gram: the similarity score between each 3 gram of original and the generated sentence
+    :param cos_sim_5_gram: the similarity score between each 5 gram of original and the generated sentence
+    :param epochs: number of epochs for the model
+    :param learning_rate: learning rate for the model
+    :param min_delta: minimum delta for early stopping of the model
+    :param model_name: The model name we want to test
+    :param patience: patience fo the early stopping of the model
+    :param pol_dif: the difference polarity score between the original and the generated sentence
+    :param seed: for the random state
+    :param seq_length: length of the given sequences
+    :param subj_dif: the difference subjective score between the original and the generated sentence
+    :param melody_extraction_method: The method used to extract melody features (naive or with meta data)
+    :return: Nothing
+    """
+    comb_dict['seed'].append(seed)
+    comb_dict['seq_length'].append(seq_length)
+    comb_dict['learning_rate'].append(learning_rate)
+    comb_dict['batch_size'].append(batch_size)
+    comb_dict['epochs'].append(epochs)
+    comb_dict['patience'].append(patience)
+    comb_dict['min_delta'].append(min_delta)
+    comb_dict['model_names'].append(model_name)
+    comb_dict['cos_sim_1_gram'].append(cos_sim_1_gram)
+    comb_dict['cos_sim_2_gram'].append(cos_sim_2_gram)
+    comb_dict['cos_sim_3_gram'].append(cos_sim_3_gram)
+    comb_dict['cos_sim_5_gram'].append(cos_sim_5_gram)
+    comb_dict['cos_sim_max_gram'].append(cos_sim)
+    comb_dict['polarity_diff'].append(pol_dif)
+    comb_dict['subjectivity_diff'].append(subj_dif)
+    comb_dict['melody_method'].append(melody_extraction_method)
+    comb_dict['loss_val'].append(loss_val)
+    comb_dict['accuracy'].append(accuracy)
+def generate_song_given_sequence(model_name, model, tokenizer, seed_words, vector_of_indices, required_length, artist,
+                                 name, index_value, melodies_song):
+    """
+    This function generates a new song
+    :param model_name: model name
+    :param melodies_song: a matrix contains the melodies of this song
+    :param model:
+    :param tokenizer:
+    :param seed_words:
+    :param vector_of_indices:
+    :param required_length:
+    :param artist:
+    :param name:
+    :param index_value:
+    :return: Nothing
+    """
+    new_song_lyrics: list = [seed_words]
+    for word_i in range(required_length):
+        if model_name == 'lyrics':  # Different input for lyrics alone and lyrics and melodies.
+            voc_prob = model.predict(vector_of_indices)
+        else:
+            melody_seq = np.expand_dims(a=melodies_song[word_i], axis=0)
+            voc_prob = model.predict([vector_of_indices, melody_seq])
+        voc_prob = voc_prob.T  # Transpose the array
+        word_index_array = np.arange(voc_prob.size)
+        # This line select a word based on the predicted probabilities
+        index_of_selected_word = random.choices(word_index_array, k=1, weights=voc_prob)
+        selected_word = find_word_by_index(word_index=index_of_selected_word[0], tokenizer=tokenizer)
+        index_of_selected_word_array = np.array(np.array(index_of_selected_word).reshape(1, 1))
+        vector_of_indices = np.append(vector_of_indices, index_of_selected_word_array, axis=1)
+        remove_index = 0
+        vector_of_indices = np.delete(vector_of_indices, remove_index, 1)
+        new_song_lyrics.append(selected_word)
+    final_text = ' '.join(new_song_lyrics)
+    if WRITE_TO_MP3:
+        lyrics_to_mp3 = gTTS(text=final_text, lang='en', slow=False)
+        lyrics_to_mp3.save(os.path.join(OUTPUT_FOLDER, f"{artist}_{name}_{index_value}.mp3"))
+    return final_text
+def find_word_by_index(word_index, tokenizer):
+    """
+    This function returns the word given the index
+    :param word_index: the index of the word we want to find
+    :param tokenizer: object
+    :return: the word at that index
+    """
+    for word, index in tokenizer.word_index.items():
+        if index == word_index:
+            return word
+def generate_lyrics(model_name, word_index, seq_length, model, tokenizer, artists, lyrics, names,
+                    word2vec, melodies) -> (list, list):
+    """
+    This function creates lyrics for each song in the testing set
+    :param melodies: a 3D array that maps sequence and the melodies features (2D array (sequence size, melody vector)).
+    :param model_name: The model name we want to test
+    :param word_index: A dictionary maps between index to word
+    :param seq_length: length of the given sequences
+    :param model: the learned model
+    :param tokenizer: the tokenizer object
+    :param artists: list of artists in the testing set
+    :param lyrics: list of lyrics in the testing set
+    :param names: list of song names in the testing set
+    :param word2vec: A dictionary maps between word to embedding vector
+    :return: lists of original and generated songs and
+    """
+    all_original_lyrics = []
+    all_generated_lyrics = []
+    start_index_melody = 0
+    for artist, name, lyrics in zip(artists, names, lyrics):
+        print('-' * 100)
+        print(f'Original lyrics for {artist} - {name} are: "{lyrics}"')
+        relevant_words_in_song = []
+        find_relevant_words(lyrics, relevant_words_in_song, word2vec)
+        number_of_seq = len(relevant_words_in_song) - seq_length + 1
+        end_index_melody = start_index_melody + number_of_seq
+        melodies_song = melodies[start_index_melody:end_index_melody, :, :]
+        required_length = len(relevant_words_in_song) - (seq_length * TESTING_SEED_TEXT_PER_SONG)
+        for seed_index in range(TESTING_SEED_TEXT_PER_SONG):
+            # We select three different word\sentence as seed for the new song
+            starting_index = 0 + seed_index * seq_length
+            ending_index = starting_index + seq_length
+            song_first_word_in_word2vec = relevant_words_in_song[starting_index:ending_index]
+            song_first_indices = []
+            for word in song_first_word_in_word2vec:
+                word_i = [k for k, v in word_index.items() if v == word][0]
+                song_first_indices.append(word_i)
+            encoded_test = np.asarray(song_first_indices).reshape((1, seq_length))
+            seed_text = ' '.join(song_first_word_in_word2vec)
+            generated_text = generate_song_given_sequence(model_name, model, tokenizer, seed_text, encoded_test,
+                                                          required_length, artist, name, seed_index, melodies_song)
+            gen_list = generated_text.split(' ')
+            all_generated_lyrics.append(gen_list.copy()[seq_length:])
+            original_starting_index = starting_index + seq_length
+            original_ending_index = original_starting_index + required_length
+            original_lyrics = relevant_words_in_song[original_starting_index:original_ending_index]
+            all_original_lyrics.append(original_lyrics)
+            gen_list.insert(seq_length, '\n')
+            generated_text = ' '.join(gen_list)
+            print(f'Seed text: {generated_text}, required {required_length} words')
+        print('-' * 100)
+        start_index_melody = end_index_melody + 1
+    return all_original_lyrics, all_generated_lyrics
+def find_relevant_words(lyrics, selected_words, word2vec):
+    """
+    This loop selects all the relevant words in the pre-defined word2vec
+    :param lyrics:
+    :param selected_words:
+    :param word2vec:
+    :return:
+    """
+    for word in lyrics.split():
+        if word in word2vec and word not in selected_words:
+            selected_words.append(word)
+def initialize_seed(seed):
+    """
+    Initialize all relevant environments with the seed.
+    """
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+def folder_exists(path):
+    """
+    This function checks if folder path is exists, in case not, the function creates the folder.
+    :param path: folder path
+    """
+    if not os.path.exists(path):
+        os.mkdir(path)
+if __name__ == '__main__':
+    # Environment settings
+    IS_COLAB = (os.name == 'posix')
+    LOAD_DATA = not (os.name == 'posix')
+    path_separator = os.path.sep
+    IS_EXPERIMENT = False
+    WRITE_TO_MP3 = False
+    if IS_COLAB:
+        # the google drive folder we used
+        DATA_PATH = os.path.sep + os.path.join('content', 'drive', 'My\ Drive', 'datasets', 'midi').replace('\\', '')
+        IS_EXPERIMENT = True
+    else:
+        # locally
+        from data_loader import get_word2vec
+        from data_loader import get_input_sets
+        from data_loader import get_midi_files
+        from lstm_lyrics import LSTMLyrics
+        from lstm_melodies_lyrics import LSTMLyricsMelodies
+        from prepare_data import get_word2vec_matrix
+        from prepare_data import create_sets
+        from compute_score import calculate_cosine_similarity
+        from compute_score import get_polarity_diff
+        from compute_score import get_subjectivity_diff
+        from compute_score import calculate_cosine_similarity_n_gram
+        from extract_melodies_features import *
+        DATA_PATH = os.path.join('.\\', 'midi')
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+    # PATHS
+    TRAIN_NAME = 'train'
+    INPUT_TRAINING_SET = f"lyrics_{TRAIN_NAME}_set.csv"
+    TEST_NAME = 'test'
+    INPUT_TESTING_SET = f"lyrics_{TEST_NAME}_set.csv"
+    OUTPUT_FOLDER = os.path.join(DATA_PATH, 'output_files')
+    folder_exists(OUTPUT_FOLDER)
+    INPUT_FOLDER = os.path.join(DATA_PATH, 'input_files')
+    folder_exists(INPUT_FOLDER)
+    PICKLES_FOLDER = os.path.join(DATA_PATH, 'pickles')
+    folder_exists(PICKLES_FOLDER)
+    WEIGHTS_FOLDER = os.path.join(DATA_PATH, 'weights')
+    folder_exists(WEIGHTS_FOLDER)
+    WORD2VEC_FILENAME = 'word2vec'
+    RESULTS_FILE_NAME = 'results.csv'
+    COMB_PATH = os.path.join(OUTPUT_FOLDER, RESULTS_FILE_NAME)
+    GLOVE_FILE_NAME = 'glove.6B.300d'
+    ENCODING = 'utf-8'
+    LOSS = 'categorical_crossentropy'
+    METRICS = ['accuracy']
+    VECTOR_SIZE = 300
+    VALIDATION_SET_SIZE = 0.2
+    TESTING_SEED_TEXT_PER_SONG = 3
+    OPTIMIZER = 'adam'
+    if IS_EXPERIMENT:  # Experiments settings
+        seeds_list = [0]
+        learning_rate_list = [0.01]
+        batch_size_list = [32, 64]
+        epochs_list = [10]
+        patience_list = [0]
+        min_delta_list = [0.1]
+        units_list = [256]
+        seq_length_list = [1, 5, 20]
+        model_names_list = ['melodies_lyrics', 'lyrics']
+        melody_extraction = ['naive']
+        # melody_extraction = ['naive', 'with_meta_features']
+    else:  # Final settings
+        seeds_list = [0]
+        learning_rate_list = [0.01]
+        batch_size_list = [32]
+        epochs_list = [10]
+        patience_list = [0]
+        min_delta_list = [0.1]
+        units_list = [256]
+        seq_length_list = [1]
+        model_names_list = ['melodies_lyrics']
+        melody_extraction = ['naive']
+        # model_names_list = ['melodies_lyrics', 'lyrics']
+        # melody_extraction = ['naive', 'with_meta_features']
+    start_time = time.time()
+    main()
+    print("--- %s seconds ---" % (time.time() - start_time))

extract_melodies_features.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import os
+import pickle
+import numpy as np
+from tqdm import tqdm
+# Environment settings
+IS_COLAB = (os.name == 'posix')
+LOAD_DATA = not (os.name == 'posix')
+if not IS_COLAB:
+    from prepare_data import create_validation_set
+def get_midi_file_instrument_data(word_idx, time_per_word, midi_file):
+    """
+    Extract data about the midi file in the given time period. We will extract number of beat changes, instruments used
+    and velocity.
+    :param word_idx: index of word in the song
+    :param time_per_word: Average time per word in song
+    :param midi_file: The midi file
+    :return: An array where each cell contains some data about the pitch, velocity etc.
+    """
+    #  Features we want to extract:
+    start_time = word_idx * time_per_word
+    end_time = start_time + time_per_word
+    avg_velocity, avg_pitch, num_of_instruments, num_of_notes, beat_changes, has_drums = 0, 0, 0, 0, 0, 0
+    for beat in midi_file.get_beats():
+        if start_time <= beat <= end_time:
+            beat_changes += 1  # Count beats that are in the desired time frame
+        elif beat > end_time:
+            break  # We passed the final possible time
+    for instrument in midi_file.instruments:
+        in_range = False  # Will become true if the instrument contributed at least 1 note for this sequence.
+        for note in instrument.notes:
+            if start_time <= note.start:
+                if note.end <= end_time:  # In required range
+                    has_drums = 1 if instrument.is_drum else has_drums
+                    in_range = True
+                    num_of_notes += 1
+                    avg_pitch += note.pitch
+                    avg_velocity += note.velocity
+                else:  # We passed the last relevant note
+                    break
+        if in_range:
+            num_of_instruments += 1
+    if num_of_notes > 0:  # If there was at least 1 note
+        avg_velocity /= num_of_notes
+        avg_pitch /= num_of_notes
+    final_features = np.array([avg_velocity, avg_pitch, num_of_instruments, beat_changes, has_drums])
+    return final_features
+def extract_melody_features_1(melodies_list, sequence_length, encoded_song_lyrics):
+    """
+    First function for extracting features about the midi files. Using the instrument objects in each midi file we can
+    see when each instrument was used and with what velocity. We can then calculate the average pitch and velocity for
+    each word in the song.
+    :param melodies_list: A list of midi files. Contains the training / validation / test set typically.
+    :param sequence_length: Number of words per sequence.
+    :param encoded_song_lyrics: A list where each cell represents a song. The cells contain a list of ints, where each
+    cell corresponds to a word in the songs lyrics and the value is the index of the word in our word2vec vocabulary.
+    :return: A 3d numpy array where the first axis is the number of sequences in the data, the 2nd is the sequence
+    length and the third is the number of notes for that particular word in that sequence.
+    """
+    final_features = []
+    print('Extracting melody features v1..')
+    for idx, midi_file in tqdm(enumerate(melodies_list)):
+        num_of_words_in_song = len(encoded_song_lyrics[idx])
+        midi_file.remove_invalid_notes()
+        time_per_word = midi_file.get_end_time() / num_of_words_in_song  # Average time per word in the lyrics
+        number_of_sequences = num_of_words_in_song - sequence_length
+        features_during_lyric = []
+        for word_idx in range(num_of_words_in_song):  # Iterate over every word and get the features for it
+            instrument_data = get_midi_file_instrument_data(word_idx, time_per_word, midi_file)
+            features_during_lyric.append(instrument_data)
+        for sequence_num in range(number_of_sequences):
+            seq = features_during_lyric[sequence_num:sequence_num + sequence_length]  # Create a sequence from the notes
+            final_features.append(seq)
+    final_features = np.array(final_features)
+    return final_features
+def extract_melody_features_2(melodies_list, sequence_length, encoded_song_lyrics):
+    """
+    Using all midi files and lyrics, extract features for all sequences. This is the second method we'll try. Basically,
+    we will take the piano roll matrix for each song. This is a matrix that displays which notes were played for every
+    user defined time period and some number representing the velocity. In our case, we'll slice the song every 1/50
+    seconds (20 miliseconds) and look at what notes were played during this time. This is in addition to the features
+    used in v1.
+    :param melodies_list: A list of midi files. Contains the training / validation / test set typically.
+    :param total_dataset_size: Total length of the sequence array,
+    :param sequence_length: Number of words per sequence.
+    :param encoded_song_lyrics: A list where each cell represents a song. The cells contain a list of ints, where each cell
+    corresponds to a word in the songs lyrics and the value is the index of the word in our word2vec vocabulary.
+    :return: A 3d numpy array where the first axis is the number of sequences in the data, the 2nd is the sequence
+    length and the third is the number of notes for that particular word in that sequence.
+    """
+    final_features = []
+    print('Extracting melody features v2..')
+    frequency_sample = 50
+    for midi_idx, midi_file in tqdm(enumerate(melodies_list)):
+        num_of_words_in_song = len(encoded_song_lyrics[midi_idx])
+        midi_file.remove_invalid_notes()
+        time_per_word = midi_file.get_end_time() / num_of_words_in_song  # Average time per word in the lyrics
+        number_of_sequences = num_of_words_in_song - sequence_length
+        piano_roll = midi_file.get_piano_roll(fs=frequency_sample)
+        num_of_notes_per_word = int(piano_roll.shape[1] / num_of_words_in_song)  # Num of piano roll columns per word
+        features_during_lyric = []
+        for word_idx in range(num_of_words_in_song):   # Iterate over every word and get the features for it
+            notes_features = extract_piano_roll_features(num_of_notes_per_word, piano_roll, word_idx)
+            instrument_data = get_midi_file_instrument_data(word_idx, time_per_word, midi_file)
+            features = np.append(notes_features, instrument_data, axis=0)  # Concatenate them
+            features_during_lyric.append(features)
+        for sequence_num in range(number_of_sequences):
+            # Create the features per sequence
+            sequence_features = features_during_lyric[sequence_num:sequence_num + sequence_length]
+            final_features.append(sequence_features)
+    final_features = np.array(final_features)
+    return final_features
+def extract_piano_roll_features(num_of_notes_per_word, piano_roll, word_idx):
+    start_idx = word_idx * num_of_notes_per_word
+    end_idx = start_idx + num_of_notes_per_word
+    piano_roll_for_lyric = piano_roll[:, start_idx:end_idx].transpose()
+    piano_roll_slice_sum = np.sum(piano_roll_for_lyric, axis=0)  # Sum each column into a single cell
+    return piano_roll_slice_sum
+def get_melody_data_sets(train_num, val_size, melodies_list, sequence_length, encoded_lyrics_matrix, seed,
+                         pkl_file_path, feature_method):
+    """
+    Creates numpy arrays containing features of the melody for the training, validation and test sets.
+    :param feature_method: Method of feature extraction to use. Either '1' or '2'.
+    :param seed: Seed for splitting to train and test.
+    :param pkl_file_path: the file path to the pickle file. Used for saving or loading.
+    :param train_num: Number of words in the whole training set sequence (train + validation)
+    :param val_size: Percentage of sequences used for validation set
+    :param melodies_list: All of the training + validation set midi files
+    :param sequence_length: Number of words in a sequence
+    :param encoded_lyrics_matrix: A list where each cell represents a song. The cells contain a list of ints, where each cell
+    corresponds to a word in the songs lyrics and the value is the index of the word in our word2vec vocabulary.
+    :return: numpy arrays containing features of the melody for the training, validation and test sets.
+    """
+    file_type = pkl_file_path.split('.')[-1]
+    # Save/load the file with the appropriate name according to the settings used:
+    pkl_file_path = f'{pkl_file_path.rstrip("." + file_type)}_{str(feature_method)}_sl_{sequence_length}.{file_type}'
+    if os.path.exists(pkl_file_path):  # If file exists, use it instead of building it again
+        with open(pkl_file_path, 'rb') as f:
+            melody_train, melody_val, melody_test = pickle.load(f)
+        return melody_train, melody_val, melody_test
+    if feature_method == 'naive':  # Use appropriate melody feature method
+        melody_features = extract_melody_features_1(melodies_list, sequence_length, encoded_lyrics_matrix)
+    else:
+        melody_features = extract_melody_features_2(melodies_list, sequence_length, encoded_lyrics_matrix)
+    melody_train = melody_features[:train_num]
+    melody_test = melody_features[train_num:]
+    melody_train, melody_val = create_validation_set(melody_train, val_size, seed)
+    with open(pkl_file_path, 'wb') as f:
+        pickle.dump([melody_train, melody_val, melody_test], f)
+        print('Dumped midi files')
+    return melody_train, melody_val, melody_test
+print("Loaded Successfully")

figures/1.PNG ADDED Viewed

figures/10.PNG ADDED Viewed

figures/11.PNG ADDED Viewed

figures/12.PNG ADDED Viewed

figures/13.PNG ADDED Viewed

figures/14.PNG ADDED Viewed

figures/15.PNG ADDED Viewed

figures/2.PNG ADDED Viewed

figures/3.PNG ADDED Viewed

figures/4.PNG ADDED Viewed

figures/5.PNG ADDED Viewed

figures/6.PNG ADDED Viewed

figures/7.PNG ADDED Viewed

figures/8.PNG ADDED Viewed

figures/9.PNG ADDED Viewed

lstm_lyrics.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""
+This function manages the LSTM model with lyrics only
+"""
+import os
+from keras import Input, Model
+from keras import backend as K
+from keras.layers import Dense, Dropout, Embedding, Bidirectional, LSTM, Masking
+from keras.optimizers import Adam
+# Environment settings
+IS_COLAB = (os.name == 'posix')
+LOAD_DATA = not (os.name == 'posix')
+if not IS_COLAB:
+    from rnn import RecurrentNeuralNetwork
+class LSTMLyrics(RecurrentNeuralNetwork):
+    def __init__(self, seed, loss, metrics, optimizer, learning_rate, total_words, seq_length, vector_size,
+                 word2vec_matrix, units):
+        """
+        Seed - The seed used to initialize the weights
+        width, height, cells - used for defining the tensors used for the input images
+        loss, metrics, optimizer, dropout_rate - settings used for compiling the siamese model (e.g., 'Accuracy' and 'ADAM)
+        :return Nothing
+        """
+        super().__init__(seed)
+        K.clear_session()
+        self.seed = seed
+        self.initialize_seed()
+        self.initialize_model(learning_rate, loss, metrics, optimizer, seq_length, total_words, units, vector_size,
+                              word2vec_matrix)
+    def initialize_model(self, learning_rate, loss, metrics, optimizer, seq_length, total_words, units, vector_size,
+                         word2vec_matrix):
+        """
+        This function initializes the architecture and builds the model
+        :param learning_rate: a tuning parameter in an optimization algorithm that determines the step size
+        :param loss: the loss function we want to use
+        :param metrics: the metrics we want to use, such as Loss
+        :param optimizer: the optimizer function, such as Adam
+        :param seq_length: the length of the sequence (the sentence in this case)
+        :param total_words: total number of words we have (used for the output dense)
+        :param units: number of LSTM units
+        :param vector_size: the size of the embedding vector
+        :param word2vec_matrix: the embedding matrix
+        :return: Nothing
+        """
+        lyrics_features_input = Input((seq_length,))
+        embedding_layer = Embedding(input_dim=total_words,  # the size of the vocabulary in the text data
+                                    input_length=seq_length,  # the length of input sequences
+                                    output_dim=vector_size,
+                                    # the size of the vector space in which words will be embedded
+                                    weights=[word2vec_matrix],
+                                    trainable=False,
+                                    # the model must be informed that some part of
+                                    # the data is actually padding and should be ignored.
+                                    mask_zero=True,
+                                    name='MelodiesLyrics')(lyrics_features_input)
+        masking_layer = Masking(mask_value=0.)(embedding_layer)
+        # Bidirectional Recurrent layer
+        b_rnn_layer = Bidirectional(LSTM(units=units, activation='relu'))(masking_layer)
+        dropout_layer = Dropout(0.6)(b_rnn_layer)
+        output_dense = Dense(units=total_words, activation='softmax')(dropout_layer)
+        self.model = Model(inputs=lyrics_features_input, outputs=output_dense)
+        if optimizer == 'adam':
+            optimizer = Adam(lr=learning_rate)
+        self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
+print("Loaded Successfully")

lstm_melodies_lyrics.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""
+This function manages the LSTM model with lyrics and melodies
+"""
+import os
+from keras import Input, Model
+from keras import backend as K
+from keras.layers import Dense, Dropout, Embedding, Concatenate, Bidirectional, LSTM, Masking
+from keras.optimizers import Adam
+# Environment settings
+IS_COLAB = (os.name == 'posix')
+LOAD_DATA = not (os.name == 'posix')
+if not IS_COLAB:
+    from rnn import RecurrentNeuralNetwork
+class LSTMLyricsMelodies(RecurrentNeuralNetwork):
+    def __init__(self, seed, loss, metrics, optimizer, learning_rate, total_words, seq_length, vector_size,
+                 word2vec_matrix, units, melody_num_features):
+        """
+        Seed - The seed used to initialize the weights
+        width, height, cells - used for defining the tensors used for the input images
+        loss, metrics, optimizer, dropout_rate - settings used for compiling the siamese model (e.g., 'Accuracy' and 'ADAM)
+        :return Nothing
+        """
+        super().__init__(seed)
+        K.clear_session()
+        self.seed = seed
+        self.initialize_seed()
+        self.initialize_model(learning_rate, loss, metrics, optimizer, seq_length, total_words, units, vector_size,
+                              word2vec_matrix, melody_num_features)
+    def initialize_model(self, learning_rate, loss, metrics, optimizer, seq_length, total_words, units, vector_size,
+                         word2vec_matrix, melody_num_features):
+        """
+        This function initializes the architecture and builds the model
+        :param melody_num_features: number of the melody features
+        :param learning_rate: a tuning parameter in an optimization algorithm that determines the step size
+        :param loss: the loss function we want to use
+        :param metrics: the metrics we want to use, such as Loss
+        :param optimizer: the optimizer function, such as Adam
+        :param seq_length: the length of the sequence (the sentence in this case)
+        :param total_words: total number of words we have (used for the output dense)
+        :param units: number of LSTM units
+        :param vector_size: the size of the embedding vector
+        :param word2vec_matrix: the embedding matrix
+        :return: Nothing
+        """
+        lyrics_features_input = Input((seq_length,))
+        melody_features_input = Input((seq_length, melody_num_features))
+        embedding_layer = Embedding(input_dim=total_words,  # the size of the vocabulary in the text data
+                                    input_length=seq_length,  # the length of input sequences
+                                    output_dim=vector_size,
+                                    # the size of the vector space in which words will be embedded
+                                    weights=[word2vec_matrix],
+                                    trainable=False,
+                                    # the model must be informed that some part of
+                                    # the data is actually padding and should be ignored.
+                                    mask_zero=True,
+                                    name='MelodiesLyrics')(lyrics_features_input)
+        masking_layer = Masking(mask_value=0.)(embedding_layer)
+        concatenate_layer = Concatenate(axis=2)([masking_layer, melody_features_input])
+        # Bidirectional Recurrent layer
+        b_rnn_layer = Bidirectional(LSTM(units=units, activation='relu'))(concatenate_layer)
+        dropout_layer = Dropout(0.6)(b_rnn_layer)
+        output_dense = Dense(units=total_words, activation='softmax')(dropout_layer)
+        self.model = Model(inputs=[lyrics_features_input, melody_features_input], outputs=output_dense)
+        if optimizer == 'adam':
+            optimizer = Adam(lr=learning_rate)
+        self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
+print("Loaded Successfully")

prepare_data.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+This file manages the data preparation
+"""
+import numpy as np
+def get_word2vec_matrix(total_words, index2word, word2vec, vector_size):
+    """
+    This function creates a matrix where the rows are the words and the columns represents the embedding vector.
+    We will use this matrix in the embedding layer
+    :param total_words: Number of words in our word2vec dictionary.
+    :param index2word: dictionary maps between index and word
+    :param word2vec: dictionary maps between a word and a vector
+    :param vector_size: the size of the embedding vector size
+    :return: embedding layer
+    """
+    word2vec_matrix = np.zeros((total_words, vector_size))
+    for index_word, word in index2word.items():
+        if word not in word2vec:
+            print(f'Can not find the word "{word}" in the word2vec dictionary')
+            continue
+        else:
+            vec = word2vec[word]
+            word2vec_matrix[index_word] = vec
+    return word2vec_matrix
+def _create_sequences(encoded_lyrics_list, total_words, seq_length):
+    """
+    This function creates sequences from the lyrics
+    :param encoded_lyrics_list: A list representing all the songs in the dataset (615 songs). Each cell contains a list
+    of ints, where each int corresponds to the lyrics in that song. "I'm a barbie girl" --> [23, 52, 189, 792] etc.
+    :param total_words: Number of words in our word2vec dictionary.
+    :param seq_length: Number of words predating the word to be predicted.
+    :return: (1) A numpy array containing all the sequences seen, concatenated.
+             (2) A 2d numpy array where each row represents a word and the columns are the possible words in the
+             vocabulary. There is a '1' in the corresponding word (e.g, word number '20,392' in the dataset is word
+              number '39' in the vocab.
+    """
+    input_sequences = []
+    next_words = []
+    for song_sequence in encoded_lyrics_list:  # iterate over songs
+        for i in range(seq_length, len(song_sequence)):  # iterate from minimal sequence length (number of words) to
+            start_index = i - seq_length  # number of words in the song
+            end_index = i
+            # Slice the list into the desired sequence length
+            sequence = song_sequence[start_index:end_index]
+            input_sequences.append(sequence)
+            next_word = song_sequence[end_index]
+            next_words.append(next_word)
+    input_sequences = np.array(input_sequences)
+    one_hot_encoding_next_words = convert_to_one_hot_encoding(input_sequences, next_words, total_words)
+    return input_sequences, one_hot_encoding_next_words
+def convert_to_one_hot_encoding(input_sequences, next_words, total_words):
+    """
+    This function converts input to one hot encoding
+    """
+    one_hot_encoding_next_words = np.zeros((len(input_sequences), total_words), dtype=np.int8)
+    for word_index, word in enumerate(next_words):
+        one_hot_encoding_next_words[word_index, word] = 1
+    return one_hot_encoding_next_words
+def create_sets(train_encoded_lyrics_list, test_encoded_lyrics_list, total_words, seq_length, validation_set_size,
+                seed):
+    """
+    This function splits training set to smaller training set and new validation set
+    :param train_encoded_lyrics_list: list of sequences in the training set
+    :param test_encoded_lyrics_list: list of sequences in the testing set
+    :param total_words: total words in the lyrics
+    :param seq_length: length of the sequence
+    :param validation_set_size: percentage of the validation set
+    :param seed: random state for the split
+    :return: training/testing/validation set values and labels
+    """
+    x_train, y_train = _create_sequences(encoded_lyrics_list=train_encoded_lyrics_list,
+                                         total_words=total_words, seq_length=seq_length)
+    x_train, x_val = create_validation_set(data_to_split=x_train,
+                                           val_data_percentage=validation_set_size,
+                                           seed=seed)
+    y_train, y_val = create_validation_set(data_to_split=y_train,
+                                           val_data_percentage=validation_set_size,
+                                           seed=seed)
+    x_test, y_test = _create_sequences(encoded_lyrics_list=test_encoded_lyrics_list,
+                                       total_words=total_words, seq_length=seq_length)
+    return {'train': (x_train, y_train), 'validation': (x_val, y_val), 'test': (x_test, y_test)}
+def create_validation_set(data_to_split, val_data_percentage, seed):
+    """
+    This function splits to training and validation set
+    :param data_to_split: matrix where the rows are the sequences and the columns are the word indices
+    :param val_data_percentage: percentage of the validation set
+    :param seed: random state for the split
+    :return: training and validation set
+    """
+    np.random.seed(seed=seed)
+    np.random.shuffle(data_to_split)
+    validation_ending_index = int(len(data_to_split) * val_data_percentage)
+    validation_set = data_to_split[:validation_ending_index]
+    data_to_split = data_to_split[validation_ending_index:]
+    return data_to_split, validation_set
+print('Loaded Successfully')

readme.md ADDED Viewed

	@@ -0,0 +1,385 @@

+# The purpose
+A Recurrent Neural Network that can learn song lyrics and their melodies and then given a melody and a few words to start with, predict the rest of the song. This is essentially done by generating new words for the song and attempting to be as “close” as possible to the original lyrics. However, this is entirely subjective leading the evaluation of generated words to use imaginative methods. For the training phase, however, we used Crossed Entropy loss.
+## Table of Contents
+  * [Authors](#authors)
+  * [Introduction](#introduction)
+  * [Instructions](#instructions)
+  * [Dataset Analysis](#dataset-analysis)
+  * [Code Design](#code-design)
+  * [Melody Feature Integration](#melody-feature-integration)
+  * [Architecture](#architecture)
+  * [Results Evaluation](#results-evaluation)
+  * [Full Experimental Setup](#full-experimental-setup)
+  * [Analysis of how the Seed and Melody Effects the Generated Lyrics](#analysis-of-how-the-seed-and-melody-effects-the-generated-lyrics)
+## Authors
+* **Tomer Shahar** - [Tomer Shahar](https://github.com/Tomer-Shahar)
+* **Nevo Itzhak** - [Nevo Itzhak](https://github.com/nevoit)
+## Introduction
+In this assignment, we were tasked with creating a Recurrent Neural Network that can learn song lyrics and their melodies and then given a melody and a few words to start with, predict the rest of the song. This is essentially done by generating new words for the song and attempting to be as “close” as possible to the original lyrics. However, this is quite subjective leading the evaluation of generated words to use imaginative methods. For the training phase, however, we used Crossed Entropy loss.
+The melody files and lyrics for each song were given to us and the train / test sets were predefined. 20% of the training data was used as a validation set in order to track our progress between training iterations.
+We implemented this using an LSTM network. LSTMs have proven in the past to be successful in similar tasks because of their ability to remember previous data, which in our case is relevant because each lyric depends on the words (and melody) that preceded it.
+The network receives as input a sequence of lyrics and predicts the next word to appear. The length of this sequence greatly affects the network’s predicting abilities since 5 words in a row work much better than just a single word. We tried using different values to see how this changes the accuracy of the model. During the training phase, sequences from the actual lyrics are fed into the network to train. After fitting the model, we can generate the lyrics for a whole song by beginning with an initial “seed” which is a sequence of words, predicting a word and then using it to advance the sequence like a moving window.
+## Instructions
+1. Please download the following:
+	* A .zip file containing all the MIDI files of the participating songs
+	* the .csv file with all the lyrics of the of the participating songs (600 train and 5 test)
+	* [Pretty_Midi](https://nbviewer.jupyter.org/github/craffel/pretty-midi/blob/master/Tutorial.ipynb) , a python library for the analysis of MIDI files
+2. Implement a recurrent neural net (LSTM or GRU) to carry out the task described in the introduction.
+	* During each step of the training phase, your architecture will receive as input one word of the lyrics. Words are to represented using the Word2Vec representation that can be found online (300 entries per term, as learned in class).
+	* The task of the network is to predict the next word of the song’s lyrics. Please see the figure 1 for an illustration. You may use any loss function
+	* In addition to this textual information, you need to include information extracted from the MIDI file. The method for implementing this requirement is entirely up to your consideration. Figure 1 shows one of the more simplistic options – inserting the entire melody representation at each step.
+	* Note that your mechanism for selecting the next word should not be deterministic (i.e., always select the word with the highest probability) but rather be sampling-based. The likelihood of a term to be selected by the sampling should be proportional to its probability.
+	* You may add whatever additions you want to the architecture (e.g., regularization, attention, teacher forcing)
+	* You may create a validation set. The manner of splitting (and all related decisions) are up to you.
+3. The Pretty_Midi package offers multiple options for analyzing .mid files.
+Figures 2-4 demonstrate the types of information that can be gathered.
+4. You can add whatever other information you consider relevant to further improve the performance of your model.
+5. You are to evaluate two approaches for integrating the melody information into your model. The two approaches don’t have to be completely different (one can build upon the other, for example), but please refrain from making only miniature changes.
+6. Please include the following information in your report regarding the training phase:
+	*	The chosen architecture of your model
+	*	A clear description of your approach(s) for integrating the melody information together with the lyrics
+	*	TensorBoard graphs showing the training and validation loss of your model.
+7. Please include the following information in your report regarding the test phase:
+	*	For each of the melodies in the test set, produce the outputs (lyrics) for each of the two architectural variants you developed. The input should be the melody and the initial word of the output lyrics. Include all generated lyrics in your submission.
+	*	For each melody, repeat the process described above three times, with different words (the same words should be used for all melodies).
+	*	Attempt to analyze the effect of the selection of the first word and/or melody on the generated lyrics.
+## Dataset Analysis
+- 600 song lyrics for the training
+-  5 songs for the test set.
+- Midi files for each song containing just the song's melody.
+- Song lyrics features:
+	- The length of a song is the number of words in the lyrics that are also present in the word2vec data.
+		- For the training set:
+		- Minimal song length: 3 words (Perhaps a hip hop song with lots of slang)
+		- Maximal song length: 1338
+		- Average song length: 257.37
+	- For the test set:
+		- Minimal song length: 94 words
+		- Maximal song length: 389
+		- Average song length: 231.6
+**Input Files:**
+A screenshot of the input folder
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/1.PNG?raw=true)
+You need to put files in two folders: input_files and midi_files, the other folders are generated automatically.
+Inside input_files put the glove 6B 300d file and the training and testing set:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/2.PNG?raw=true)
+An example of the glove file:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/3.PNG?raw=true)
+An example for lyrics_train_set.csv (columns: artist, song name and lyrics):
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/4.PNG?raw=true)
+Inside the folder midi_files put the midi files:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/5.PNG?raw=true)
+## Code Design
+Our code consists of three scripts:
+1. Experiment.py - the script that runs the experiments to find the optimal parameters for our LSTM network.
+2. Data_loader.py - Loads the midi files, the lyrics, fixes irregularities and cleans the song file names, loads the word embeddings file, saves and loads the various .pkl files.
+3. Prepare_data.py - Performs various helper functions on the data such as splitting it properly, creating a validation set and creating the word embeddings matrix.
+4. Compute_score.py - Because of the nature of this task, it is difficult to judge the successfulness of our model based on classic loss functions such as MSE. So this script contains several different methods to automatically score the output of our model, such as measuring the cosine similarity or the subjectivity of the lyrics. Explained more later.
+5. Extract_melodies_features - Extracts the features we want from the midi files and splits them into train / test / validation. Explained more later.
+6. Lstm_lyrics.py - The first LSTM model. This one only takes into account the lyrics of the song. This is used for comparison to see the improvement of using melodies.
+7. Lstm_melodies_lyrics.py - The second LSTM model. This one incorporates the features of the midi files of each song. More on this later.
+## Melody Feature Integration
+We devised two different methods to extract features from the melodies. One of them a more naive technique, and the other a more sophisticated way that expands the first method.
+**Method #1**: Each midi file contains a list of all instruments used in the file. For each instrument, an Instrument object contains a list of all time periods this instrument was used, the pitch used (the note)  and velocity (how strong the note was played) as you can see in figure 1.
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/6.PNG?raw=true)
+Figure 1: The data available for each instrument of the midi file
+The midi file contains the length of the melody, and we know the number of words in the lyrics, so we can easily approximate how many seconds lasts each word on average. Based on this, we assign each word a time span and can deduce what instruments were played during that word and how strong. If a word appears during times 15.2 - 15.8, we can search through the instrument objects for which ones appeared during that time frame.
+Using this data, we can compute how many instruments were used, their average pitch and average velocity per word. This provides the network some information about the nature of the song during this lyric, i.e. a low or high pitch and how high the velocity is.
+In addition, we can easily use the function get_beats()  of pretty midi to find all the beat changes in the song and their times. We simply count the number of beat changes during the word’s time frame and thus add another feature for our network.
+**Method #2**: With the first method we have the average pitch used for each word. Now, we want a more precise measurement of this. Each pretty midi object has a function getPianoRoll(fs) which returns a matrix that represents the notes used in the midi file on a near continuous time scale (See figure 1). Specifically, it returns an array of size 128\*S where the size of S equals the length of the song (i.e the time of the last note played) multiplied by how many times each second a sample is taken, denoted by the parameter fs. E.g, for fs=10 every 1/10ths of a second a sample will be made, meaning 10 samples per second so for a song of 120 seconds we will have 1200 samples. Thus getPianoRoll(fs=10) will return a matrix of size 128x1200. By this method, we can control the granularity of the data with ease.
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/7.PNG?raw=true)
+Figure 2: Piano roll matrix. The value in each cell is the velocity summed across instruments.
+The reason for the 128 is that musical pitch has a possible range of 0 to 127. So each column in this matrix represents the notes played during this sample (in our example, the notes played every 100 milliseconds).
+After creating this matrix, we can calculate how many notes are played, on average, per word. For example, if there are 2000 columns and a song has 50 words, it means that each word in the lyrics can be connected to about 40 notes. This is not precise of course, but a useful approximation.
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/8.PNG?raw=true)
+Figure 3: Notes played during a specific word in a song. Here each lyric received 40 notes representing it (columns 10-39 not shown). There are still 128 rows for each possible note.
+We then iterate over every word in the song’s lyrics and find the notes that were played during that particular lyric. For example, in Figure 3 we can see that for a certain word, notes number 57, 64 and 69 were played.
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/9.PNG?raw=true)
+Figure 4: The sum of the notes played during a specific word.
+Finally, for each lyric-specific matrix, we sum each row to easily see what notes were played and how much. In figure 4, we can see the result of summing the matrix presented in figure 3. This is fed together with the array of word embeddings of each word in the sequence, thus attaching melody features to word features.
+## Architecture
+We used a fairly standard approach to a bidirectional LSTM network, with the addition of allowing it to receive as input both an embedding vector and the melody features. We also created an LSTM network that doesn’t receive melodies just to study the impact of melody on the results.
+Number of layers: Both versions receive as input a sequence of lyrics. Then there is an embedding layer after the input that uses the word2vec dictionary to convert each word to the appropriate vector representing it. The difference between the networks is that the one using the melodies has a concatenating layer that appends the vectors of lyrics to the vector of melodies.
+Additionally, we tried feeding the network various sequence lengths: 1, 5 and 10. We wanted to see how much the sequence length affects the results.
+In addition to the piano roll matrix we keep the features extracted in method 1.
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/10.PNG?raw=true)
+- Layers 3 & 4 are only for the model that uses the melody features.
+- Since RNNs have to receive input of fixed length, we use masking to ensure that the input is the same size each time.
+- We simply concatenate all of the features and feed it into the LSTM to utilize the melody features. However, the features entered vary greatly between our two approaches.
+- We used a relatively high drop rate of 60% since we don’t want the network to converge too quickly and overfit on the training data. We tried lower values initially and found more success with 60%.
+- The input of the final layer depends on the number of units in the Bidirectional LSTM.
+- The final output is a probability for each word, and we sample one from there according to the distribution.
+Tensorboard Graph:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/11.PNG?raw=true)
+**Stopping criteria:**
+Here we also experimented with several parameters: We used the EarlyStopping function monitoring on the validation loss with a minimum delta of 0.1 (Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.) and patience of 0 (Number of epochs with no improvement after which training will be stopped). We experimented with several values and found the most success with these.
+**Network Hyper-Parameters Tuning:**
+NOTE: Here we explain the reasons behind the choices of the parameters.
+After implementing our RNN, we optimized the different parameters used. Some parameters, like the number of units in an LSTM, it is very hard to predict what will work best so this method is the best way to find good values to use.
+Each combination takes a long time to train (5-15 minutes):
+- Learning Rate: We tried different values, ranging from 0.1 to 0.00001. After running numerous experiments, we found 0.00001 to work the best.
+- Epochs: We tried epochs of 5, 10 and 150. We found 10 to work the best.
+- Batch size: We tried 32 and 2048. 32 worked better.
+- Units in LSTM: 64 and 256
+- We tried all of the possible combinations of the parameters detailed above which led to a huge number of experiments but led to us finding the optimal settings which were used in the section below.
+## Results Evaluation
+In this assignment, we were asked to generate lyrics for the 5 songs in the test set. One way to evaluate the results is simply to see how many cases did our model predict the word that was actually used in the song. However, this is not actually a good method to evaluate the model since if it generated a word that was incredibly similar to it simple accuracy wouldn’t detect that. Note that we let our model predict the exact same number of words as in the original song. We devised a few methods to judge our models lyrical capabilities:
+1. **Cosine Similarity**: this is a general method to compare the similarity of two vectors. So if our model predicted “happy”, and the original lyrics had the word “smile”, we take the vector of each word from the embedding matrix and calculate the cosine similarity, 1 being the best and 0 the worst. There are a few variations for this however:
+	2. Comparing each word predicted to the word in the song - the most straightforward method. If a song has 200 words we will perform 200 comparisons according to the index of each word.
+	3. Creating n-grams of the lyrics, calculating the average of each n-gram and then comparing the n-grams according to their order. This method is a bit better in our opinion, since if the model predicted words (“A”, “B”) and they appeared as (“B”, “A”) in the song, an n-gram style similarity will determine that this was a good prediction while a unigram style won’t.  So we tried with 1, 2, 3 and 5-grams.
+4. **Polarity**: Using the TextBlob package, we computed the polarity of the generated lyrics and the original ones. Polarity is a score ranging from -1 to 1, -1 representing a negative sentence and 1 representing a positive one. We checked if the lyrics carry the same feelings and themes more or less. We present in the results the absolute difference between them, meaning that a polarity difference of 0 means the lyrics have similar sentiments.
+5. **Subjectivity**: Again drawing from TextBlob, subjectivity is a measure of how subjective a sentence is, 0 being very objective and 1 being very subjective. We calculate the absolute difference between the generated lyrics and the original lyrics.
+Note: In the final section where we predict song lyrics, we tried with different seeds as requested. With a sequence length of S, we take the first S words (i.e, words #1, #2, ..#S) and predict the rest of the song. We then skip the first S words and take words S+1 until 2S. Then we skip the first 2S words and use words 2S+1 until 3S.
+Example with Sequence Length of 3:
+Seed 1, seed 2 and seed 3 -
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/12.PNG?raw=true)
+## Full Experimental Setup
+Validation Set: Empirically, we learned that using a validation set is better than not if there isn’t enough data. We used the fairly standard 80/20 ratio between training and validation which worked well.
+- Batch sizes - 32
+- Epochs - 5
+- Learning rate: 0.01
+- Min delta for improvement: 0.1
+- 256 units in the LSTM layer
+Additionally, we tried feeding the network various sequence lengths of 1 and 5 to study the effect on the quality of the results.
+**Experimental Results:**
+The best results are in bold -
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/13.PNG?raw=true)
+**Analysis**: unlike our expectations, the model with simpler features worked better in almost all cases, perhaps due to Occam’s Razor. We theorize that the features about the instruments provided a good abstraction of the features of the entire piano roll.
+However, it is clear that adding some melody features to the model improved it on all parameters (except subjectivity). Additionally, having a sequence length of 5 has mixed results and doesn’t seem to have much of an impact on the evaluation methods we chose. We will look into this manually in the next section.
+An interesting point is that for all cosine similarity evaluations, an increased n gave a higher similarity. We are not sure why this happens, but we think that with greater values of n the “average” word is more similar. We tested the cosine similarity where n={length of song}, and indeed the similarity was over 0.9. We then tested with a random choice of words and all of the words in a song (i.e., the average vector of the whole song), and the cosine similarity was a staggering 0.75.
+**Generated Lyrics:**
+For Brevity’s sake we’ll only show both models with a sequence of 1 and the advanced model with a sequence of 5.
+**Model with simple melody features - sequence length 1**
+A screenshot from the TensorBoard framework:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/14.PNG?raw=true)
+1. **Lyrics for the bangles - eternal flame**
+	**Seed text: close **
+	**close**  feelin dreams baby that cause like friends im have day cool let be their would your wit ignorance such forgiven oh may doll nothing down i now around suddenly ball have empty that beautiful how you lonely no goes gone you are called of for wanted me life of stress apart say i all way, required 55 words
+	**Seed text: your**
+	**your**  gentle i were remember how swear she neither too girl out through with more love me me eyes said have i used heartache hmm anymore desire fighting she when stay be part lights spend by bite again say try ruining slide lover i eyes get always honey of maybe to it hope its white i, required 55 words
+	**Seed text: eyes **
+	**eyes**  and walk the night woah not live you his world more when just wakes you you fans me to it son sleeping you up i that it da we me let the i longing my do maybe warm fought a believe guys the hear blind dont your through this a down what tell gonna oh, required 55 words
+2. **Lyrics for billy joel - honesty**
+	**Seed text: if **
+	**if**  do hell your as you hard so the be of mable we love do fat give about em with if show you me its of some can top tell if like over baby an the out that a right get as their leaves are oh come happy joy fight me thief give i goodbye sharing like hey all it last you open right i to tonight wake be shift i sister no i on got years wear to make show dont learn be you the live from outer jump drag the myself face shes raps, required 95 words
+	**Seed text: you **
+	**you**  sherry really but take my girl you and its kick knew so or the a tuya love no how love have of the me there the like its if i winter see reason baa i have would want im high him dancin ever but worked wanna the i mean the you when ill say get well leave up just actor that shit now do the chaka over dead got better to no my the imitating me and my can here and itself footsteps to like leave looked are phone for will will keep my mind, required 95 words
+	**Seed text: search **
+	**search**  it so class und any you and that friends cried day whoa fine the i three the in the you lovin its a and said hall way others let night hey beautiful dreams dishes save beer store evil back summer yeah forget when well both strong said you me way your the repeat jolly im what told the really to love huh the you baby go river get id and uranus what around with the down and you would always i heart dont with once go land mind come still so to them one else, required 95 words
+3. **Lyrics for cardigans - lovefool**
+	**Seed text: dear **
+	**dear**  to pick tears slide low live such ill yourself me deep out crazy never kick i the belongs get others shelter before her it i wasnt survive ring off baby im to want life ho hanging if i each high you out mine you won rang woman i the do you we you certain guy the jesus my my much flame to you just you world pretty me to dont fault to ear know see love guide, required 77 words
+	**Seed text: i **
+	i  dumb look me kit i ive and clothes type meet all of didnt love baby the to you i the baby heart these and up look i out just family the what baby theyre all my love down sittin money be from something stars out no while now your got guide and time some was my you off would you is na man he and remember down hes best in hand be shotgun to leaves the that, required 77 words
+	**Seed text: fear **
+	**fear**  at ive no i your be friend kill thats you years im so right your hurts a if love ill night ever feel what his like ride behind love but man a going can good and gone do see if have name all turn the is start the about you down breaking you at the lady did hard call you the about threatening ass thing together in fall love i they its a up drop youre out, required 77 words
+4. **Lyrics for aqua - barbie girl**
+	**Seed text: hiya **
+	**hiya**  put there to copa out kick when sad when it my cars girl the with in i me the some a around eyes stay cause be clock we never still cant missed anytime motion quiet ive go hot it on the a you had and sign live tennessee no fools got so i father hope for never for you the just it there me my believe other oh red your dont dream the drives the they chorus the happy crosses they to i i because if won this i want didnt ask the, required 93 words
+	**Seed text: barbie **
+	**barbie**  me go to country smiling all now from love she my is this world not that in to though i beat your be bad new hard cant pretty to wont to round do things without try it walking of ill things in man love a hands were for well you to no chuckie gonna i wish done arms tell lets it beat waiting found we good man write i nigga at do never you it ooh try are attention yeah oh hurt that too without roll yourself with the you feeling switch dont, required 93 words
+	**Seed text: hi **
+	**hi**  this feeling gotta that alone im do she sweet and you ever you the in had the the raise up skies it youre do me its inspired song with what that feel other mine time the easily what when you and three cause beat and its gets christmas your you sad a behind nothing a i number back or never and who your move beat you driving you i love and of do other like on go when oh yea heart plane after her that mine never soul like one you made you, required 93 words
+5. **Lyrics for blink 182 - all the small things**
+	**Seed text: all **
+	**all**  live ive fire love did my right so truck reading it life its sin heal well two home we confused mony its song you tried could disguise know find for amadeus where sailor you and the to wo insane yeah skin wind ride song me heart up bite a a new a i let money world didnt on, required 58 words
+	**Seed text: the **
+	**the**  love want risk whoa breakin take need cebu me amadeus control weve lose and try cryin away know hopes away what theres makes in you right drunk live always ever one bop your lovely on steal bet i say somebody say gonna sad stay frosty a grease scene his hangin your dry touch mind i you you your, required 58 words
+	**Seed text: small **
+	**small**  lost with sun find when casbah you time huh to please for you see make the life dont you me to she the waitin honey weed all fill fired wish on alone thats like im the to and yeah long sure the broadway the need somebody always achy dont well i as seen my that boy your that, required 58 words
+**Model with advanced melody features - sequence length 5**
+A screenshot from the TensorBoard framework:
+![](https://github.com/nevoit/Lyric-Generation-Recurrent-Neural-Network/blob/master/figures/15.PNG?raw=true)
+1. **Lyrics for the bangles - eternal flame**
+	**Seed text: close your eyes give me **
+	**close your eyes give me** rest dreams want that ill what knows im yeah good trust gonna be those find your anyone temper boys dead oh forever somethin mine have i this would playin total yeah planetary that fast from you two like moon believe you will truth, required 43 words
+	**Seed text: hand darling do you feel **
+	**hand darling do you feel** of for isnt me more of rising midnight got i for cant whiskey i as rock need swear how watched mind thats got too when day love me me or every yeah i dreams forgotten cryin bag mony flesh how no own be, required 43 words
+	**Seed text: My heart beating understand the**
+	**My heart beating understand the** air comin lying through began girl never another thorns bought slim i or go some coming of maybe to it fly so fuck i and thing the ive safe here nothing you long eyes day no but danced you you abandoned me to, required 43 words
+2. **Lyrics for billy joel - honesty**
+	**Seed text: If you search for tenderness **
+	**If you search for tenderness**  my radio finger you now i that it ooh we me gonna the i gin my know maybe hair won a better bleed the live fantasy dont your too say a have baby think cant is do warm your tell you hard so the be of ow do love know believin really ever free when up said you me so of us can heartache more up what hot up before the got that a not are tell their di will oh down hands, required 83 words
+	**Seed text:  it isnt hard to find**
+	**it isnt hard to find**   tired whats me pryor wanna i goodbye sueno what them all it last you since not i to shes joy be america i law like i be they lover catch and right said we stayin be you the nothing life fingers front anyone the myself without try dream cat da but he my thats you and so goin knew so ever the a waters love no from is was of the me right the what its up i wit have watching lied dont, required 83 words
+	**Seed text: Can have the love need**
+	**Can have the love need**  was over now im high story shoot give can wire wont the i mean the you no at got are only true want just handed that train youre know the yesterday hot women never alone to love my the believing it and my can need and clover tricks and if true needs will gun for let let much my then it dont note crowd wind you and that save nowhere good whoa alive the i breathe the it the you weve its a, required 83 words
+3. **Lyrics for cardigans - lovefool**
+	**Seed text: Dear i fear were facing**
+	**Dear i fear were facing**  and every distant cause quite gonna ive them fast dreams wintry song dre my here world summer get music when only follow along every you me cause your the making dub im if such the da to love trying the you up one taking last hes and boss baby would when the see and you been look i from we with shit one taught then, required 65 words
+	**Seed text: A problem you love me**
+	**A problem you love me**  down wanna so to did man fool to wrote girls bought wake nothing boys at wait me though got crazy they goin the the drives go quite hiding keep again it i wasnt passed morning boy want oh to now night forgive sick up i fun has you got than you cares pushing wrong i the know you we you silk guy the rainy my, required 65 words
+	**Seed text: No longer know and maybe**
+	**No longer know and maybe**  my hes alien to you just you around needed me to dont trouper to bleed know have is dying seemed long me leapin i by a turned probably beautiful all of friend love want the to you i the want as chance and want little i got but plays the if up theyre for my is see fake loves be life hold money got like, required 65 words
+4. **Lyrics for aqua - barbie girl**
+	**Seed text: hiya barbie hi ken do**
+	**hiya barbie hi ken do**  nobody youre of they dying and come little get my you face find you is rain look need and rock see them till in body be cute to jive the your feel by like i your love friend drag who you heard oh dont take your pieces a up love at ive give take baby really what ring else is but look a off with back and believe do have up yeah stop all gotta the im start the ever you, required 81 words
+	**Seed text: a ride sure jump in**
+	**a ride sure jump in**  fly for they for you the just my feel me my better other all making your dont dream the stone the come ba the hands covered come to i i kiss baby cares say i now friend ask the me one to throw asked for this could love heart my is say around he that in to knows i beat your love other another hard there red to turn to sometimes know hey stay am it street of at hey in, required 81 words
+	**Seed text: you want to go for**
+	**you want to go for**  have killing you not the dawn than hard id you the ever unopened dimension over old in fall love i come its a want desire this got should feel to mess say goin when alive when it my bar thats the when in i me the us a would or remember ill love limit do time wanna there rhymes sounding rendezvous quiet by one stay it on the a you try and town nothing moonlit like stormy they dont i middle, required 81 words
+5. **Lyrics for blink 182 - all the small things**
+	**Seed text: All the small things true**
+	**All the small things true**   his love a such away its only you to no bread ill i sing deep sun think walk it beat fuck best do back look bone i lot feel know they you it gotta am will shaking was all running that around stay bitch wait with, required 46 words
+	**Seed text: Care truth brings ill take**
+	**Care truth brings ill take**   the you feeling push dont say goodbye an that hear im do how sweet and you his you the it new the the traveled want sayin it this do a its wannabe song with if that take other than come the horse if when you and, required 46 words
+	**Seed text: One lift your ride best**
+	**One lift your ride best **  breathe make beat and its reason black of you fine a sleep mine a i scene too ever time and why your room beat you sings you i is and in do matter what on one no oh anybody from tu touch think that than they, required 46 words
+## Analysis of how the Seed and Melody Effects the Generated Lyrics
+We see that the lyrics are mostly unintelligible, and tend to have words that are very common in the data set (the word “love” appears over 40 times in the generated lyrics and it is indeed a common lyric in popular songs). It doesn’t appear that more advanced melody features improved the subjective quality of the lyrics produced, like how our quantitative methods deemed that it doesn’t improve much. We did notice however a peculiar feature, where once a word appeared for the first time, it tended to appear many times after (or similar variations of it, e.g. if “i” appeared then “i”, “me” or “my” tend to appear after it a lot”). This is to be expected from a model that maintains a cell state and predicts words based on their embedding.
+Also it’s apparent that the seed chosen wildly changes the words produced. We think this is because the melody plays a much smaller part in predicting the lyrics compared to the seed, so even with the same melody the dominating factor in producing the lyrics is the seed - see our evaluation table above; the results are slightly better with the melody attached, but not by much, meaning that the first word assists the model much more compared to, say, a baseline of a random word each time.
+Personally, we don’t see much of an improvement in using 5 words as a seed versus just the first word. Occasionally it leads to better combinations but it’s a hit-or-miss usually. We think this is because of 2 main reasons:
+Many songs contain slang that isn’t in the word embedding matrix so we cannot learn from them or predict them
+Many song lyrics aren’t completely coherent and the words are fairly independent of each other (for a good example see the original lyrics of the last song in the test set, “All the Small Things” by Blink 182).

rnn.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+This function manages the general RNN architecture
+"""
+import os
+import random
+import numpy as np
+from keras import backend as K
+from keras.callbacks import EarlyStopping, TensorBoard
+# Environment settings
+IS_COLAB = (os.name == 'posix')
+LOAD_DATA = not (os.name == 'posix')
+if IS_COLAB:
+    from datetime import datetime
+    from packaging import version
+    # Define the Keras TensorBoard callback.
+    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
+    tensorboard_callback = TensorBoard(log_dir=logdir)
+class RecurrentNeuralNetwork(object):
+    def __init__(self, seed):
+        """
+        Seed - The seed used to initialize the weights
+        width, height, cells - used for defining the tensors used for the input images
+        loss, metrics, optimizer, dropout_rate - settings used for compiling the siamese model (e.g., 'Accuracy' and 'ADAM)
+        """
+        K.clear_session()
+        self.seed = seed
+        self.initialize_seed()
+        self.model = None
+    def initialize_seed(self):
+        """
+        Initialize seed all for environment
+        """
+        os.environ['PYTHONHASHSEED'] = str(self.seed)
+        random.seed(self.seed)
+        np.random.seed(self.seed)
+    def _load_weights(self, weights_file):
+        """
+        A function that attempts to load pre-existing weight files for the siamese model. If it succeeds then returns
+        True and updates the weights, otherwise False.
+        :return True if the file is already exists
+        """
+        self.model.summary()
+        self.load_file = weights_file
+        if os.path.exists(weights_file):  # if the file is already exists, load and return true
+            print('Loading pre-existed weights file')
+            self.model.load_weights(weights_file)
+            return True
+        return False
+    def fit(self, weights_file, batch_size, epochs, patience, min_delta, x_train, y_train, x_val, y_val):
+        """
+        Function for fitting the model. If the weights already exist, just return the summary of the model. Otherwise,
+        perform a whole train/validation/test split and train the model with the given parameters.
+        """
+        # Create callbacks
+        if not self._load_weights(weights_file=weights_file):
+            print('No such pre-existed weights file')
+            print('Beginning to fit the model')
+            if IS_COLAB:
+                callbacks = [
+                    tensorboard_callback,
+                    EarlyStopping(monitor='val_loss', patience=patience, min_delta=min_delta)
+                ]
+            else:
+                callbacks = [
+                    EarlyStopping(monitor='val_loss', patience=patience, min_delta=min_delta)
+                ]
+            self.model.fit(x_train,
+                           y_train,
+                           batch_size=batch_size,
+                           epochs=epochs,
+                           callbacks=callbacks,
+                           validation_data=(x_val, y_val))
+            self.model.save_weights(self.load_file)
+        # evaluate on the validation set
+        loss, accuracy = self.model.evaluate(x_val, y_val, batch_size=batch_size)
+        print(f'Loss on Validation set: {loss}')
+        print(f'Accuracy on Validation set: {accuracy}')
+    def evaluate(self, x_test, y_test, batch_size):
+        """
+        Function for evaluating the final model after training.
+        test_file - file path to the test file.
+        batch_size - the batch size used in training.
+        Returns the loss and accuracy results.
+        """
+        print(f'Available Metrics: {self.model.metrics_names}')
+        y_test = np.array(y_test, dtype='float64')
+        x_test[0] = np.array(x_test[0], dtype='float64')
+        x_test[1] = np.array(x_test[1], dtype='float64')
+        # evaluate on the test set
+        loss, accuracy = self.model.evaluate(x_test, y_test, batch_size=batch_size)
+        return loss, accuracy
+    def predict(self, data):
+        return self.model.predict(data)
+print('Loaded Successfully')