spotify-genre-analyzer

Running

App Files Files Community

plozia commited on Feb 15

Commit

1007d5f

verified ·

1 Parent(s): 13367cb

adjust desc

Browse files

Files changed (1) hide show

app.py +81 -90

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import sys
 import io
 import random
 import math
-import time
 import requests
 import spotipy
 import gradio as gr
@@ -24,47 +23,57 @@ global_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
     client_secret=ENV_SPOTIFY_CLIENT_SECRET
 ))
-# Simple cache for MusicBrainz responses
-musicbrainz_cache = {}
-def safe_spotify_call(func, *args, **kwargs):
-    """Wrapper for Spotify API calls with exponential backoff."""
-    max_retries = 5
-    delay = 1  # start delay in seconds
-    for attempt in range(max_retries):
-        try:
-            return func(*args, **kwargs)
-        except spotipy.exceptions.SpotifyException as e:
-            if "rate" in str(e).lower():
-                time.sleep(delay)
-                delay *= 2  # exponential backoff
-            else:
-                raise
-    raise Exception("Spotify API rate limit reached. Try again later.")
 def get_musicbrainz_genre(artist_name):
-    # Use cached result if available
-    if artist_name in musicbrainz_cache:
-        return musicbrainz_cache[artist_name]
-    url = "https://musicbrainz.org/ws/2/artist/"
-    params = {"query": artist_name, "fmt": "json"}
     headers = {"User-Agent": "SpotifyAnalyzer/1.0 ([email protected])"}
     try:
-        response = requests.get(url, params=params, headers=headers)
-        data = response.json()
-        if "artists" in data and data["artists"]:
-            first_artist = data["artists"][0]
-            tags = first_artist.get("tags", [])
-            if tags:
-                # Return the tag with the highest count
-                sorted_tags = sorted(tags, key=lambda t: t.get("count", 0), reverse=True)
-                genre = sorted_tags[0]["name"]
-                musicbrainz_cache[artist_name] = genre
-                return genre
     except Exception:
         pass
-    musicbrainz_cache[artist_name] = "Unknown"
     return "Unknown"
 def extract_playlist_id(url: str) -> str:
@@ -80,17 +89,16 @@ def extract_playlist_id(url: str) -> str:
 def get_playlist_tracks(playlist_id: str, spotify_client) -> list:
     tracks = []
     try:
-        results = safe_spotify_call(spotify_client.playlist_tracks, playlist_id)
         tracks.extend(results["items"])
         while results["next"]:
-            results = safe_spotify_call(spotify_client.next, results)
             tracks.extend(results["items"])
-    except spotipy.exceptions.SpotifyException:
         return []
     return tracks
 def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_secret: str):
-    # Use provided credentials if available; otherwise, use global_sp.
     if spotify_client_id.strip() and spotify_client_secret.strip():
         local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
             client_id=spotify_client_id.strip(),
@@ -107,29 +115,9 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
     if not tracks:
         return ("No tracks found or playlist is private.", None, None, None, None, "")
-    # Collect unique artist IDs for batch fetching (only for artists with an id)
-    unique_artist_ids = {
-        artist_info.get("id")
-        for item in tracks if (track := item.get("track"))
-        for artist_info in track.get("artists", [])
-        if artist_info.get("id")
-    }
-    unique_artist_ids = list(unique_artist_ids)
-    # Batch fetch artist details (Spotify API supports up to 50 ids per call)
-    artist_details = {}
-    batch_size = 50
-    for i in range(0, len(unique_artist_ids), batch_size):
-        batch = unique_artist_ids[i:i+batch_size]
-        results = safe_spotify_call(local_sp.artists, batch)
-        for artist in results.get("artists", []):
-            artist_details[artist["id"]] = artist
     genre_count = {}
-    tracks_table = []
-    # Use a local cache for each artist id's genres to avoid reprocessing.
     artist_cache = {}
     for item in tracks:
         track = item.get("track")
         if not track:
@@ -141,15 +129,27 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
         artist_info = artists[0]
         artist_name = artist_info.get("name", "Unknown Artist")
         artist_id = artist_info.get("id")
-        # Use batch-fetched details if available
-        if artist_id and artist_id in artist_details:
-            genres = artist_details[artist_id].get("genres", [])
         else:
             genres = []
-        # If no genres from Spotify, try MusicBrainz (and cache)
-        if not genres:
-            genres = [get_musicbrainz_genre(artist_name)]
-        # Update genre count
         if genres:
             for g in genres:
                 genre_count[g] = genre_count.get(g, 0) + 1
@@ -166,7 +166,6 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
     genres_table_data.sort(key=lambda x: x[1], reverse=True)
     genres_df = pd.DataFrame(genres_table_data, columns=["Genre", "Count", "Percentage"])
-    # Generate a bar chart for top 15 genres
     top15 = genres_df.head(15)
     plt.figure(figsize=(10, 6))
     plt.bar(top15["Genre"], top15["Count"], color='skyblue')
@@ -194,7 +193,7 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
     """
     tracks_html = table_style + tracks_df.to_html(escape=False, index=False, classes="nice-table")
-    # Prepare state for recommendations:
     top_genres = [genre for genre in genres_df["Genre"].head(15).tolist() if genre.lower() != "unknown"]
     if not top_genres:
         top_genres = ["pop"]
@@ -219,12 +218,11 @@ def generate_recommendations(state, local_sp, table_style):
     recommended_artists = set()
     for genre in state["top_genres"]:
         try:
-            # Initial search call
-            search_result = safe_spotify_call(local_sp.search, q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"])
             total = search_result.get("tracks", {}).get("total", 0)
             if total > state["rec_per_genre"]:
                 offset = random.randint(0, min(total - state["rec_per_genre"], 100))
-                search_result = safe_spotify_call(local_sp.search, q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"], offset=offset)
             items = search_result.get("tracks", {}).get("items", [])
             for t in items:
                 track_name = t.get("name", "Unknown Track")
@@ -272,30 +270,23 @@ def refresh_recommendations(state):
 # Main interface description and disclaimer
 description_text = (
-    "This agent analyzes a public Spotify playlist (must be user-shared; providing a playlist uploaded by Spotify will result in an error) by generating a genre distribution, "
-    "a track list (with direct Spotify and YouTube Music search links), and a table of recommended tracks "
-    "based on the top genres found in the playlist. API keys are not stored. "
-    "Use the 'Refresh recommendations' button to get a new set of recommendations."
-)
-# Added note regarding long processing times due to rate limits.
-additional_note = (
-    "<br><br><b>Note:</b> If the agent is processing for too long, check the logs. "
-    "If you see a message like 'Your application has reached a rate/request limit', "
-    "it means that the provided Spotify API key has reached its limits. Please generate your own API keys and add them."
 )
 disclaimer_text = (
-    "<b>Disclaimer:</b> This tool works best for playlists with around 100-200 songs (30-60s). "
-    "For larger playlists, processing may take multiple minutes. A default API key is provided, but if you reach "
-    "the limits, you can supply your own API keys, which you can quickly obtain from "
-    "<a href='https://developer.spotify.com/' target='_blank'>Spotify Developer</a>."
 )
 with gr.Blocks() as demo:
     gr.Markdown("# Spotify Playlist Analyzer & Recommendations + YouTube Music Links")
     gr.Markdown(disclaimer_text)
-    gr.Markdown(description_text + additional_note)
     with gr.Row():
         playlist_url = gr.Textbox(label="Spotify Playlist URL")

 import io
 import random
 import math
 import requests
 import spotipy
 import gradio as gr
     client_secret=ENV_SPOTIFY_CLIENT_SECRET
 ))
 def get_musicbrainz_genre(artist_name):
+    search_url = "https://musicbrainz.org/ws/2/artist/"
     headers = {"User-Agent": "SpotifyAnalyzer/1.0 ([email protected])"}
+    params = {"query": artist_name, "fmt": "json"}
+    try:
+        search_response = requests.get(search_url, params=params, headers=headers)
+        search_data = search_response.json()
+        if "artists" in search_data and search_data["artists"]:
+            best_artist = None
+            best_score = 0
+            for artist in search_data["artists"]:
+                name = artist.get("name", "")
+                score = int(artist.get("score", 0))
+                if name.lower() == artist_name.lower():
+                    best_artist = artist
+                    break
+                if score > best_score:
+                    best_score = score
+                    best_artist = artist
+            if best_artist:
+                mbid = best_artist.get("id")
+                if mbid:
+                    lookup_url = f"https://musicbrainz.org/ws/2/artist/{mbid}"
+                    lookup_params = {"inc": "tags+genres", "fmt": "json"}
+                    lookup_response = requests.get(lookup_url, params=lookup_params, headers=headers)
+                    lookup_data = lookup_response.json()
+                    official_genres = lookup_data.get("genres", [])
+                    if official_genres:
+                        return official_genres[0].get("name", "Unknown")
+                    tags = lookup_data.get("tags", [])
+                    if tags:
+                        sorted_tags = sorted(tags, key=lambda t: t.get("count", 0), reverse=True)
+                        return sorted_tags[0].get("name", "Unknown")
+    except Exception:
+        pass
+    return "Unknown"
+def get_audiodb_genre(artist_name):
+    url = "https://theaudiodb.com/api/v1/json/1/search.php"
+    params = {"s": artist_name}
     try:
+        response = requests.get(url, params=params)
+        if response.ok:
+            data = response.json()
+            if data and data.get("artists"):
+                artist_data = data["artists"][0]
+                genre = artist_data.get("strGenre", "")
+                if genre:
+                    return genre
     except Exception:
         pass
     return "Unknown"
 def extract_playlist_id(url: str) -> str:
 def get_playlist_tracks(playlist_id: str, spotify_client) -> list:
     tracks = []
     try:
+        results = spotify_client.playlist_tracks(playlist_id)
         tracks.extend(results["items"])
         while results["next"]:
+            results = spotify_client.next(results)
             tracks.extend(results["items"])
+    except spotipy.SpotifyException:
         return []
     return tracks
 def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_secret: str):
     if spotify_client_id.strip() and spotify_client_secret.strip():
         local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
             client_id=spotify_client_id.strip(),
     if not tracks:
         return ("No tracks found or playlist is private.", None, None, None, None, "")
     genre_count = {}
     artist_cache = {}
+    tracks_table = []
     for item in tracks:
         track = item.get("track")
         if not track:
         artist_info = artists[0]
         artist_name = artist_info.get("name", "Unknown Artist")
         artist_id = artist_info.get("id")
+        # Try Spotify genres; if empty, fallback to MusicBrainz then AudioDB.
+        if artist_id:
+            if artist_id in artist_cache:
+                genres = artist_cache[artist_id]
+            else:
+                try:
+                    artist_data = local_sp.artist(artist_id)
+                    genres = artist_data.get("genres", [])
+                except spotipy.SpotifyException:
+                    genres = []
+                if not genres:
+                    mb_genre = get_musicbrainz_genre(artist_name)
+                    if mb_genre == "Unknown":
+                        audiodb_genre = get_audiodb_genre(artist_name)
+                        if audiodb_genre != "Unknown":
+                            genres = [audiodb_genre]
+                    else:
+                        genres = [mb_genre]
+                artist_cache[artist_id] = genres
         else:
             genres = []
         if genres:
             for g in genres:
                 genre_count[g] = genre_count.get(g, 0) + 1
     genres_table_data.sort(key=lambda x: x[1], reverse=True)
     genres_df = pd.DataFrame(genres_table_data, columns=["Genre", "Count", "Percentage"])
     top15 = genres_df.head(15)
     plt.figure(figsize=(10, 6))
     plt.bar(top15["Genre"], top15["Count"], color='skyblue')
     """
     tracks_html = table_style + tracks_df.to_html(escape=False, index=False, classes="nice-table")
+    # Prepare state for recommendations: exclude "Unknown" genres.
     top_genres = [genre for genre in genres_df["Genre"].head(15).tolist() if genre.lower() != "unknown"]
     if not top_genres:
         top_genres = ["pop"]
     recommended_artists = set()
     for genre in state["top_genres"]:
         try:
+            search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"])
             total = search_result.get("tracks", {}).get("total", 0)
             if total > state["rec_per_genre"]:
                 offset = random.randint(0, min(total - state["rec_per_genre"], 100))
+                search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"], offset=offset)
             items = search_result.get("tracks", {}).get("items", [])
             for t in items:
                 track_name = t.get("name", "Unknown Track")
 # Main interface description and disclaimer
 description_text = (
+    "This agent analyzes a public Spotify playlist (must be user-shared; providing a playlist uploaded by Spotify will result in an error) "
+    "by generating a genre distribution, a track list (with direct Spotify and YouTube Music search links), and a table of recommended tracks "
+    "based on the top genres found in the playlist. API keys are not stored. Use the 'Refresh recommendations' button to get a new set of recommendations."
 )
 disclaimer_text = (
+    "<b>Disclaimer:</b> This tool works best for playlists with around 100-200 songs (30-60s). For larger playlists, processing may take multiple minutes. "
+    "A default API key is provided, but if you reach the limits, you can supply your own API keys, which you can quickly obtain from "
+    "<a href='https://developer.spotify.com/' target='_blank'>Spotify Developer</a>.<br>"
+    "Note: If the agent is processing for too long, check the logs. If you see a message like 'Your application has reached a rate/request limit', "
+    "it means that the provided Spotify API key has reached its limits. Please generate your own API keys and add them."
 )
 with gr.Blocks() as demo:
     gr.Markdown("# Spotify Playlist Analyzer & Recommendations + YouTube Music Links")
     gr.Markdown(disclaimer_text)
+    gr.Markdown(description_text)
     with gr.Row():
         playlist_url = gr.Textbox(label="Spotify Playlist URL")