File size: 6,774 Bytes
849c3a4
 
 
 
 
 
 
 
 
 
03e8fb3
 
849c3a4
 
03e8fb3
 
 
 
 
 
 
 
 
849c3a4
 
 
 
 
 
 
 
 
 
 
03e8fb3
bbaa90e
849c3a4
03e8fb3
 
 
849c3a4
 
 
 
 
03e8fb3
849c3a4
 
 
03e8fb3
849c3a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03e8fb3
849c3a4
 
03e8fb3
849c3a4
 
 
 
03e8fb3
 
849c3a4
 
 
 
 
 
 
03e8fb3
849c3a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03e8fb3
 
849c3a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03e8fb3
 
849c3a4
03e8fb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.sparse import csr_matrix
from rapidfuzz import process, fuzz
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
import logging
import psutil


# Configure logging
logging.basicConfig(level=logging.INFO)
logging.info("Application started")

def log_memory_usage():
    process = psutil.Process()
    memory_info = process.memory_info()
    logging.info(f"Memory Usage: {memory_info.rss / 1024 ** 2:.2f} MB")

# Spotify API setup
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id=os.environ['sp_client_id'],
    client_secret=os.environ['sp_client_secret']))

# Define features for scaling and calculations
features = ['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 
            'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
default_weights = [1/len(features)] * len(features)

# Read and preprocess the data
logging.info("Reading and preprocessing track data")
tracks_data = pd.read_csv('filtered_songs.csv')
tracks_data = tracks_data[(tracks_data['popularity'] > 40) & (tracks_data['instrumentalness'] <= 0.85)]
logging.info("Track data loaded and processed")
log_memory_usage()


# Function to fetch a song from Spotify
def get_song_from_spotify(song_name, artist_name=None):
    try:
        search_query = song_name if not artist_name else f"{song_name} artist:{artist_name}"
        logging.info(f"Searching Spotify for: {search_query}")
        results = sp.search(q=search_query, limit=1, type='track')
        if results['tracks']['items']:
            track = results['tracks']['items'][0]
            logging.info(f"Found track on Spotify: {track['name']} by {', '.join(artist['name'] for artist in track['artists'])}")
            audio_features = sp.audio_features(track['id'])[0]
            song_details = {
                'id': track['id'],
                'name': track['name'],
                'popularity': track['popularity'],
                'duration_ms': track['duration_ms'],
                'explicit': int(track['explicit']),
                'artists': ', '.join([artist['name'] for artist in track['artists']]),
                'danceability': audio_features['danceability'],
                'energy': audio_features['energy'],
                'key': audio_features['key'],
                'loudness': audio_features['loudness'],
                'mode': audio_features['mode'],
                'speechiness': audio_features['speechiness'],
                'acousticness': audio_features['acousticness'],
                'instrumentalness': audio_features['instrumentalness'],
                'liveness': audio_features['liveness'],
                'valence': audio_features['valence'],
                'tempo': audio_features['tempo'],
                'time_signature': audio_features['time_signature'],
            }
            return song_details
        else:
            logging.warning(f"No results found on Spotify for: {search_query}")
            return None
    except Exception as e:
        logging.error(f"Error fetching song from Spotify: {e}")
        return None

# Enhanced Fuzzy Matching Function
def enhanced_fuzzy_matching(song_name, artist_name, df):
    logging.info(f"Performing fuzzy matching for: {song_name}, {artist_name}")
    # Existing code
    combined_query = f"{song_name} {artist_name}".strip()
    df['combined'] = df['name'] + ' ' + df['artists']
    matches = process.extractOne(combined_query, df['combined'], scorer=fuzz.token_sort_ratio)
    return df.index[df['combined'] == matches[0]].tolist()[0] if matches else None

# Function to apply the selected scaler and calculate weighted cosine similarity
def calculate_weighted_cosine_similarity(input_song_index, weights, num_songs_to_output, tracks_data, scaler_choice):
    logging.info("Calculating weighted cosine similarity")
    # Apply the selected scaler
    if scaler_choice == 'Standard Scaler':
        scaler = StandardScaler()
    else:  # MinMaxScaler
        scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(tracks_data[features]) * weights
    tracks_sparse = csr_matrix(scaled_features)

    # Calculate cosine similarities
    cosine_similarities = cosine_similarity(tracks_sparse[input_song_index], tracks_sparse).flatten()
    similar_song_indices = np.argsort(-cosine_similarities)[1:num_songs_to_output+1]
    return similar_song_indices


# Function to recommend songs
def recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights):
    num_songs_to_output = int(num_songs_to_output)
    weights = np.array([float(weight) for weight in input_weights]) if input_weights else default_weights
    weights /= np.sum(weights)  # Normalize weights

    song_index = enhanced_fuzzy_matching(song_name, artist_name, tracks_data)
    if song_index is not None:
        similar_indices = calculate_weighted_cosine_similarity(song_index, weights, num_songs_to_output, tracks_data, scaler_choice)
        similar_songs = tracks_data.iloc[similar_indices][['name', 'artists']]
        return similar_songs
    else:
        return pd.DataFrame(columns=['name', 'artists'])

# Gradio interface setup
logging.info("Setting up Gradio interface")
description = "Enter a song name and artist name (optional) to get song recommendations. Adjust the feature weights using the sliders. The system will automatically normalize the weights."

inputs = [
    gr.components.Textbox(label="Song Name", placeholder="Enter a song name..."),
    gr.components.Textbox(label="Artist Name (optional)", placeholder="Enter artist name (if known)..."),
    gr.components.Number(label="Number of Songs to Output", value=5),
    gr.components.Dropdown(choices=["Standard Scaler", "MinMax Scaler"], label="Select Scaler", value="Standard Scaler")
]

# Add sliders for each feature weight
for feature in features:
    inputs.append(gr.components.Slider(minimum=0, maximum=1, value=1/len(features), label=f"Weight for {feature}"))

# Gradio interface setup
iface = gr.Interface(
    fn=lambda song_name, artist_name, num_songs_to_output, scaler_choice, *input_weights: recommend_songs_interface(song_name, artist_name, num_songs_to_output, scaler_choice, tracks_data, *input_weights),
    inputs=inputs,
    outputs=gr.components.Dataframe(),
    title="Song Recommender",
    description=description
)

# Run the Gradio app
if __name__ == "__main__":
    logging.info("Setting up Gradio interface")
    logging.info("Launching Gradio interface")
    iface.launch()
    logging.info("Application finished")