Spaces:
Running
Running
adjust desc
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import sys
|
|
3 |
import io
|
4 |
import random
|
5 |
import math
|
6 |
-
import time
|
7 |
import requests
|
8 |
import spotipy
|
9 |
import gradio as gr
|
@@ -24,47 +23,57 @@ global_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
|
|
24 |
client_secret=ENV_SPOTIFY_CLIENT_SECRET
|
25 |
))
|
26 |
|
27 |
-
# Simple cache for MusicBrainz responses
|
28 |
-
musicbrainz_cache = {}
|
29 |
-
|
30 |
-
def safe_spotify_call(func, *args, **kwargs):
|
31 |
-
"""Wrapper for Spotify API calls with exponential backoff."""
|
32 |
-
max_retries = 5
|
33 |
-
delay = 1 # start delay in seconds
|
34 |
-
for attempt in range(max_retries):
|
35 |
-
try:
|
36 |
-
return func(*args, **kwargs)
|
37 |
-
except spotipy.exceptions.SpotifyException as e:
|
38 |
-
if "rate" in str(e).lower():
|
39 |
-
time.sleep(delay)
|
40 |
-
delay *= 2 # exponential backoff
|
41 |
-
else:
|
42 |
-
raise
|
43 |
-
raise Exception("Spotify API rate limit reached. Try again later.")
|
44 |
-
|
45 |
def get_musicbrainz_genre(artist_name):
|
46 |
-
|
47 |
-
if artist_name in musicbrainz_cache:
|
48 |
-
return musicbrainz_cache[artist_name]
|
49 |
-
|
50 |
-
url = "https://musicbrainz.org/ws/2/artist/"
|
51 |
-
params = {"query": artist_name, "fmt": "json"}
|
52 |
headers = {"User-Agent": "SpotifyAnalyzer/1.0 ([email protected])"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
try:
|
54 |
-
response = requests.get(url, params=params
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
genre = sorted_tags[0]["name"]
|
63 |
-
musicbrainz_cache[artist_name] = genre
|
64 |
-
return genre
|
65 |
except Exception:
|
66 |
pass
|
67 |
-
musicbrainz_cache[artist_name] = "Unknown"
|
68 |
return "Unknown"
|
69 |
|
70 |
def extract_playlist_id(url: str) -> str:
|
@@ -80,17 +89,16 @@ def extract_playlist_id(url: str) -> str:
|
|
80 |
def get_playlist_tracks(playlist_id: str, spotify_client) -> list:
|
81 |
tracks = []
|
82 |
try:
|
83 |
-
results =
|
84 |
tracks.extend(results["items"])
|
85 |
while results["next"]:
|
86 |
-
results =
|
87 |
tracks.extend(results["items"])
|
88 |
-
except spotipy.
|
89 |
return []
|
90 |
return tracks
|
91 |
|
92 |
def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_secret: str):
|
93 |
-
# Use provided credentials if available; otherwise, use global_sp.
|
94 |
if spotify_client_id.strip() and spotify_client_secret.strip():
|
95 |
local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
|
96 |
client_id=spotify_client_id.strip(),
|
@@ -107,29 +115,9 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
|
|
107 |
if not tracks:
|
108 |
return ("No tracks found or playlist is private.", None, None, None, None, "")
|
109 |
|
110 |
-
# Collect unique artist IDs for batch fetching (only for artists with an id)
|
111 |
-
unique_artist_ids = {
|
112 |
-
artist_info.get("id")
|
113 |
-
for item in tracks if (track := item.get("track"))
|
114 |
-
for artist_info in track.get("artists", [])
|
115 |
-
if artist_info.get("id")
|
116 |
-
}
|
117 |
-
unique_artist_ids = list(unique_artist_ids)
|
118 |
-
|
119 |
-
# Batch fetch artist details (Spotify API supports up to 50 ids per call)
|
120 |
-
artist_details = {}
|
121 |
-
batch_size = 50
|
122 |
-
for i in range(0, len(unique_artist_ids), batch_size):
|
123 |
-
batch = unique_artist_ids[i:i+batch_size]
|
124 |
-
results = safe_spotify_call(local_sp.artists, batch)
|
125 |
-
for artist in results.get("artists", []):
|
126 |
-
artist_details[artist["id"]] = artist
|
127 |
-
|
128 |
genre_count = {}
|
129 |
-
tracks_table = []
|
130 |
-
# Use a local cache for each artist id's genres to avoid reprocessing.
|
131 |
artist_cache = {}
|
132 |
-
|
133 |
for item in tracks:
|
134 |
track = item.get("track")
|
135 |
if not track:
|
@@ -141,15 +129,27 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
|
|
141 |
artist_info = artists[0]
|
142 |
artist_name = artist_info.get("name", "Unknown Artist")
|
143 |
artist_id = artist_info.get("id")
|
144 |
-
#
|
145 |
-
if artist_id
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
else:
|
148 |
genres = []
|
149 |
-
# If no genres from Spotify, try MusicBrainz (and cache)
|
150 |
-
if not genres:
|
151 |
-
genres = [get_musicbrainz_genre(artist_name)]
|
152 |
-
# Update genre count
|
153 |
if genres:
|
154 |
for g in genres:
|
155 |
genre_count[g] = genre_count.get(g, 0) + 1
|
@@ -166,7 +166,6 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
|
|
166 |
genres_table_data.sort(key=lambda x: x[1], reverse=True)
|
167 |
genres_df = pd.DataFrame(genres_table_data, columns=["Genre", "Count", "Percentage"])
|
168 |
|
169 |
-
# Generate a bar chart for top 15 genres
|
170 |
top15 = genres_df.head(15)
|
171 |
plt.figure(figsize=(10, 6))
|
172 |
plt.bar(top15["Genre"], top15["Count"], color='skyblue')
|
@@ -194,7 +193,7 @@ def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_s
|
|
194 |
"""
|
195 |
tracks_html = table_style + tracks_df.to_html(escape=False, index=False, classes="nice-table")
|
196 |
|
197 |
-
# Prepare state for recommendations:
|
198 |
top_genres = [genre for genre in genres_df["Genre"].head(15).tolist() if genre.lower() != "unknown"]
|
199 |
if not top_genres:
|
200 |
top_genres = ["pop"]
|
@@ -219,12 +218,11 @@ def generate_recommendations(state, local_sp, table_style):
|
|
219 |
recommended_artists = set()
|
220 |
for genre in state["top_genres"]:
|
221 |
try:
|
222 |
-
|
223 |
-
search_result = safe_spotify_call(local_sp.search, q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"])
|
224 |
total = search_result.get("tracks", {}).get("total", 0)
|
225 |
if total > state["rec_per_genre"]:
|
226 |
offset = random.randint(0, min(total - state["rec_per_genre"], 100))
|
227 |
-
search_result =
|
228 |
items = search_result.get("tracks", {}).get("items", [])
|
229 |
for t in items:
|
230 |
track_name = t.get("name", "Unknown Track")
|
@@ -272,30 +270,23 @@ def refresh_recommendations(state):
|
|
272 |
|
273 |
# Main interface description and disclaimer
|
274 |
description_text = (
|
275 |
-
"This agent analyzes a public Spotify playlist (must be user-shared; providing a playlist uploaded by Spotify will result in an error)
|
276 |
-
"a track list (with direct Spotify and YouTube Music search links), and a table of recommended tracks "
|
277 |
-
"based on the top genres found in the playlist. API keys are not stored. "
|
278 |
-
"Use the 'Refresh recommendations' button to get a new set of recommendations."
|
279 |
-
)
|
280 |
-
|
281 |
-
# Added note regarding long processing times due to rate limits.
|
282 |
-
additional_note = (
|
283 |
-
"<br><br><b>Note:</b> If the agent is processing for too long, check the logs. "
|
284 |
-
"If you see a message like 'Your application has reached a rate/request limit', "
|
285 |
-
"it means that the provided Spotify API key has reached its limits. Please generate your own API keys and add them."
|
286 |
)
|
287 |
|
288 |
disclaimer_text = (
|
289 |
-
"<b>Disclaimer:</b> This tool works best for playlists with around 100-200 songs (30-60s). "
|
290 |
-
"
|
291 |
-
"
|
292 |
-
"
|
|
|
293 |
)
|
294 |
|
295 |
with gr.Blocks() as demo:
|
296 |
gr.Markdown("# Spotify Playlist Analyzer & Recommendations + YouTube Music Links")
|
297 |
gr.Markdown(disclaimer_text)
|
298 |
-
gr.Markdown(description_text
|
299 |
|
300 |
with gr.Row():
|
301 |
playlist_url = gr.Textbox(label="Spotify Playlist URL")
|
|
|
3 |
import io
|
4 |
import random
|
5 |
import math
|
|
|
6 |
import requests
|
7 |
import spotipy
|
8 |
import gradio as gr
|
|
|
23 |
client_secret=ENV_SPOTIFY_CLIENT_SECRET
|
24 |
))
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def get_musicbrainz_genre(artist_name):
|
27 |
+
search_url = "https://musicbrainz.org/ws/2/artist/"
|
|
|
|
|
|
|
|
|
|
|
28 |
headers = {"User-Agent": "SpotifyAnalyzer/1.0 ([email protected])"}
|
29 |
+
params = {"query": artist_name, "fmt": "json"}
|
30 |
+
try:
|
31 |
+
search_response = requests.get(search_url, params=params, headers=headers)
|
32 |
+
search_data = search_response.json()
|
33 |
+
if "artists" in search_data and search_data["artists"]:
|
34 |
+
best_artist = None
|
35 |
+
best_score = 0
|
36 |
+
for artist in search_data["artists"]:
|
37 |
+
name = artist.get("name", "")
|
38 |
+
score = int(artist.get("score", 0))
|
39 |
+
if name.lower() == artist_name.lower():
|
40 |
+
best_artist = artist
|
41 |
+
break
|
42 |
+
if score > best_score:
|
43 |
+
best_score = score
|
44 |
+
best_artist = artist
|
45 |
+
if best_artist:
|
46 |
+
mbid = best_artist.get("id")
|
47 |
+
if mbid:
|
48 |
+
lookup_url = f"https://musicbrainz.org/ws/2/artist/{mbid}"
|
49 |
+
lookup_params = {"inc": "tags+genres", "fmt": "json"}
|
50 |
+
lookup_response = requests.get(lookup_url, params=lookup_params, headers=headers)
|
51 |
+
lookup_data = lookup_response.json()
|
52 |
+
official_genres = lookup_data.get("genres", [])
|
53 |
+
if official_genres:
|
54 |
+
return official_genres[0].get("name", "Unknown")
|
55 |
+
tags = lookup_data.get("tags", [])
|
56 |
+
if tags:
|
57 |
+
sorted_tags = sorted(tags, key=lambda t: t.get("count", 0), reverse=True)
|
58 |
+
return sorted_tags[0].get("name", "Unknown")
|
59 |
+
except Exception:
|
60 |
+
pass
|
61 |
+
return "Unknown"
|
62 |
+
|
63 |
+
def get_audiodb_genre(artist_name):
|
64 |
+
url = "https://theaudiodb.com/api/v1/json/1/search.php"
|
65 |
+
params = {"s": artist_name}
|
66 |
try:
|
67 |
+
response = requests.get(url, params=params)
|
68 |
+
if response.ok:
|
69 |
+
data = response.json()
|
70 |
+
if data and data.get("artists"):
|
71 |
+
artist_data = data["artists"][0]
|
72 |
+
genre = artist_data.get("strGenre", "")
|
73 |
+
if genre:
|
74 |
+
return genre
|
|
|
|
|
|
|
75 |
except Exception:
|
76 |
pass
|
|
|
77 |
return "Unknown"
|
78 |
|
79 |
def extract_playlist_id(url: str) -> str:
|
|
|
89 |
def get_playlist_tracks(playlist_id: str, spotify_client) -> list:
|
90 |
tracks = []
|
91 |
try:
|
92 |
+
results = spotify_client.playlist_tracks(playlist_id)
|
93 |
tracks.extend(results["items"])
|
94 |
while results["next"]:
|
95 |
+
results = spotify_client.next(results)
|
96 |
tracks.extend(results["items"])
|
97 |
+
except spotipy.SpotifyException:
|
98 |
return []
|
99 |
return tracks
|
100 |
|
101 |
def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_secret: str):
|
|
|
102 |
if spotify_client_id.strip() and spotify_client_secret.strip():
|
103 |
local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
|
104 |
client_id=spotify_client_id.strip(),
|
|
|
115 |
if not tracks:
|
116 |
return ("No tracks found or playlist is private.", None, None, None, None, "")
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
genre_count = {}
|
|
|
|
|
119 |
artist_cache = {}
|
120 |
+
tracks_table = []
|
121 |
for item in tracks:
|
122 |
track = item.get("track")
|
123 |
if not track:
|
|
|
129 |
artist_info = artists[0]
|
130 |
artist_name = artist_info.get("name", "Unknown Artist")
|
131 |
artist_id = artist_info.get("id")
|
132 |
+
# Try Spotify genres; if empty, fallback to MusicBrainz then AudioDB.
|
133 |
+
if artist_id:
|
134 |
+
if artist_id in artist_cache:
|
135 |
+
genres = artist_cache[artist_id]
|
136 |
+
else:
|
137 |
+
try:
|
138 |
+
artist_data = local_sp.artist(artist_id)
|
139 |
+
genres = artist_data.get("genres", [])
|
140 |
+
except spotipy.SpotifyException:
|
141 |
+
genres = []
|
142 |
+
if not genres:
|
143 |
+
mb_genre = get_musicbrainz_genre(artist_name)
|
144 |
+
if mb_genre == "Unknown":
|
145 |
+
audiodb_genre = get_audiodb_genre(artist_name)
|
146 |
+
if audiodb_genre != "Unknown":
|
147 |
+
genres = [audiodb_genre]
|
148 |
+
else:
|
149 |
+
genres = [mb_genre]
|
150 |
+
artist_cache[artist_id] = genres
|
151 |
else:
|
152 |
genres = []
|
|
|
|
|
|
|
|
|
153 |
if genres:
|
154 |
for g in genres:
|
155 |
genre_count[g] = genre_count.get(g, 0) + 1
|
|
|
166 |
genres_table_data.sort(key=lambda x: x[1], reverse=True)
|
167 |
genres_df = pd.DataFrame(genres_table_data, columns=["Genre", "Count", "Percentage"])
|
168 |
|
|
|
169 |
top15 = genres_df.head(15)
|
170 |
plt.figure(figsize=(10, 6))
|
171 |
plt.bar(top15["Genre"], top15["Count"], color='skyblue')
|
|
|
193 |
"""
|
194 |
tracks_html = table_style + tracks_df.to_html(escape=False, index=False, classes="nice-table")
|
195 |
|
196 |
+
# Prepare state for recommendations: exclude "Unknown" genres.
|
197 |
top_genres = [genre for genre in genres_df["Genre"].head(15).tolist() if genre.lower() != "unknown"]
|
198 |
if not top_genres:
|
199 |
top_genres = ["pop"]
|
|
|
218 |
recommended_artists = set()
|
219 |
for genre in state["top_genres"]:
|
220 |
try:
|
221 |
+
search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"])
|
|
|
222 |
total = search_result.get("tracks", {}).get("total", 0)
|
223 |
if total > state["rec_per_genre"]:
|
224 |
offset = random.randint(0, min(total - state["rec_per_genre"], 100))
|
225 |
+
search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"], offset=offset)
|
226 |
items = search_result.get("tracks", {}).get("items", [])
|
227 |
for t in items:
|
228 |
track_name = t.get("name", "Unknown Track")
|
|
|
270 |
|
271 |
# Main interface description and disclaimer
|
272 |
description_text = (
|
273 |
+
"This agent analyzes a public Spotify playlist (must be user-shared; providing a playlist uploaded by Spotify will result in an error) "
|
274 |
+
"by generating a genre distribution, a track list (with direct Spotify and YouTube Music search links), and a table of recommended tracks "
|
275 |
+
"based on the top genres found in the playlist. API keys are not stored. Use the 'Refresh recommendations' button to get a new set of recommendations."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
)
|
277 |
|
278 |
disclaimer_text = (
|
279 |
+
"<b>Disclaimer:</b> This tool works best for playlists with around 100-200 songs (30-60s). For larger playlists, processing may take multiple minutes. "
|
280 |
+
"A default API key is provided, but if you reach the limits, you can supply your own API keys, which you can quickly obtain from "
|
281 |
+
"<a href='https://developer.spotify.com/' target='_blank'>Spotify Developer</a>.<br>"
|
282 |
+
"Note: If the agent is processing for too long, check the logs. If you see a message like 'Your application has reached a rate/request limit', "
|
283 |
+
"it means that the provided Spotify API key has reached its limits. Please generate your own API keys and add them."
|
284 |
)
|
285 |
|
286 |
with gr.Blocks() as demo:
|
287 |
gr.Markdown("# Spotify Playlist Analyzer & Recommendations + YouTube Music Links")
|
288 |
gr.Markdown(disclaimer_text)
|
289 |
+
gr.Markdown(description_text)
|
290 |
|
291 |
with gr.Row():
|
292 |
playlist_url = gr.Textbox(label="Spotify Playlist URL")
|