Spaces:
Sleeping
Sleeping
File size: 11,799 Bytes
725c528 eceea93 68a2379 d3012ab 68a2379 65c83a0 725c528 30059ee 88a2d43 e28d195 35aecae 725c528 02f4def 30059ee eceea93 725c528 bd9a4ce d3012ab 725c528 d3012ab 725c528 d3012ab 725c528 d3012ab eceea93 dd07bd3 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab eceea93 d3012ab 6d5ff05 c517e46 d3012ab eceea93 d3012ab 993d08d c2e19f2 725c528 6d5ff05 48de9e2 6d5ff05 c517e46 48de9e2 d3012ab 7dbbd88 e67968d 274400d e67968d 274400d e67968d bd9a4ce e67968d bd9a4ce 7dbbd88 e67968d 274400d 87dfed9 e67968d 87dfed9 eceea93 d3012ab eceea93 e67968d d3012ab eceea93 48de9e2 e67968d 274400d e67968d 48de9e2 e67968d 48de9e2 725c528 eceea93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import gradio as gr
import zipfile
import random
input_count = 300
result_count = 21
# Extract the MovieLens dataset
with zipfile.ZipFile('ml-latest-small.zip') as z:
with z.open('ml-latest-small/movies.csv') as f:
movies = pd.read_csv(f)
with z.open('ml-latest-small/ratings.csv') as f:
ratings = pd.read_csv(f)
######################################
#
# Content-based Filtering
#
######################################
# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')
# Replace NaN with an empty string
movies['genres'] = movies['genres'].fillna('')
# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(movies['genres'])
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Construct a reverse map of indices and movie titles
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()
# Function that takes in movie title as input and outputs most similar movies
def get_cb_recommendations(title, cosine_sim=cosine_sim):
# Get the index of the movie that matches the title
idx = indices[title]
# Get the pairwise similarity scores of all movies with that movie
sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 20 most similar movies
sim_scores = sim_scores[1:result_count]
# Get the movie indices
movie_indices = [i[0] for i in sim_scores]
# Return the top 20 most similar movies with their scores
recommendations = [(movies['title'].iloc[i], sim_scores[idx][1]) for idx, i in enumerate(movie_indices)]
return recommendations
# Gradio interface
def recommend_movies_cb(movie):
if not movie:
return "No movie selected. Please select one from the dropdown."
recommendations = get_cb_recommendations(movie)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
######################################
#
# Collaborative Filtering (Item-based)
#
######################################
# Create a movie-user matrix
movie_user_matrix = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
# Compute the cosine similarity between movies
movie_similarity = cosine_similarity(movie_user_matrix)
# Create a DataFrame with movie similarities
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
# Function to get movie recommendations using item-based collaborative filtering
def get_cf_recommendations(movie_title, movie_similarity_df=movie_similarity_df, movies=movies, n=result_count):
# Get the movieId for the input movie title
movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
# Check if the movie is in our similarity matrix
if movie_id not in movie_similarity_df.index:
return []
# Get the row of similarity scores for this movie
similar_scores = movie_similarity_df.loc[movie_id]
# Sort the scores in descending order
similar_scores = similar_scores.sort_values(ascending=False)
# Get the indices of the top-n most similar movies (excluding the input movie itself)
similar_movie_indices = similar_scores.index[1:n+1]
# Get the titles and similarity scores of the recommended movies
recommendations = []
for idx in similar_movie_indices:
title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
score = similar_scores[idx]
recommendations.append((title, score))
return recommendations
# Function for Gradio interface
def recommend_movies_cf(movie_title):
if not movie_title:
return "No movie selected. Please select one from the dropdown."
if movie_title not in movies['title'].values:
return f"Movie '{movie_title}' not found in the dataset."
recommendations = get_cf_recommendations(movie_title)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
######################################
#
# Collaborative Filtering with Neural Network (Item-based)
#
######################################
# Normalize the ratings
scaler = MinMaxScaler()
movie_user_matrix_scaled = scaler.fit_transform(movie_user_matrix)
# Define the autoencoder model
input_dim = movie_user_matrix.shape[1]
encoding_dim = 32
input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)
autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
# Train the autoencoder
autoencoder.fit(movie_user_matrix_scaled, movie_user_matrix_scaled,
epochs=50, batch_size=64, shuffle=True, validation_split=0.2,
verbose=0)
# Use the trained autoencoder to predict the complete matrix
predicted_matrix_scaled = autoencoder.predict(movie_user_matrix_scaled)
predicted_matrix = scaler.inverse_transform(predicted_matrix_scaled)
# Create a DataFrame with the predicted matrix
predicted_matrix_df = pd.DataFrame(predicted_matrix, index=movie_user_matrix.index, columns=movie_user_matrix.columns)
# Compute the cosine similarity between movies using the predicted matrix
movie_similarity_cfnn = cosine_similarity(predicted_matrix)
# Create a DataFrame with movie similarities
movie_similarity_cfnn_df = pd.DataFrame(movie_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)
# Function to get movie recommendations using item-based collaborative filtering
def get_cfnn_recommendations(movie_title, movie_similarity_df=movie_similarity_cfnn_df, movies=movies, n=result_count):
# Get the movieId for the input movie title
movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
# Check if the movie is in our similarity matrix
if movie_id not in movie_similarity_df.index:
return []
# Get the row of similarity scores for this movie
similar_scores = movie_similarity_df.loc[movie_id]
# Sort the scores in descending order
similar_scores = similar_scores.sort_values(ascending=False)
# Get the indices of the top-n most similar movies (excluding the input movie itself)
similar_movie_indices = similar_scores.index[1:n+1]
# Get the titles and similarity scores of the recommended movies
recommendations = []
for idx in similar_movie_indices:
title = movies.loc[movies['movieId'] == idx, 'title'].values[0]
score = similar_scores[idx]
recommendations.append((title, score))
return recommendations
# Function for Gradio interface
def recommend_movies_cfnn(movie_title):
if not movie_title:
return "No movie selected. Please select one from the dropdown."
if movie_title not in movies['title'].values:
return f"Movie '{movie_title}' not found in the dataset."
recommendations = get_cfnn_recommendations(movie_title)
format_string = "{:>5.2f} {:<20}"
return "Score Title\n" + "\n".join([format_string.format(score, title) for title, score in recommendations])
######################################
#
# Gradio interface
#
######################################
# Create a list of movie titles for the dropdown
movie_list = random.sample(movies['title'].tolist(), input_count)
total_movies = len(movies)
with gr.Blocks() as iface:
with gr.Tab("Content-Based Filtering"):
gr.Markdown("""## Movie Recommender - Content-Based Filtering
How it works:
* Use the 'genres' feature of movies, and convert genres into numerical vectors.
* For a given movie, find the most similar movies based on the genre similarity.
* This approach uses genres of movies only, without considering user preferences or viewing history.
* Simple to implement and computationally efficient.
""")
gr.Interface(fn=recommend_movies_cb,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
# title="Movie Recommender - Content-Based Filtering",
description="Select a movie to get recommendations based on content filtering.")
with gr.Tab("Collaborative Filtering"):
gr.Markdown("""## Movie Recommender - Item-Based Collaborative Filtering
How it works:
* Create a movie-user matrix where rows represent movies and columns represent users, each cell contains the rating a user gave to a movie, or 0 if no rating exists.
* Calculate the cosine similarity between movies based on their rating patterns, results in a movie-movie similarity matrix.
* For a given movie, find the most similar movies based on this similarity matrix, and recommend these movies.
* Simple to implement and computationally efficient, but doesn't handle sparsity well (when many missing ratings).
""")
gr.Interface(fn=recommend_movies_cf,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
# title="Movie Recommender - Item-Based Collaborative Filtering",
description="Select a movie to get recommendations based on collaborative filtering.")
with gr.Tab("Collaborative Filtering with Neural Network"):
gr.Markdown("""## Movie Recommender - Item-Based Collaborative Filtering with Neural Network
How it works:
* Use a Neural Network to predict the missing values in the movie-user matrix to improve the collaborative filtering recommendations.
* The NN model learns to reconstruct the movie-user matrix, effectively predicting missing ratings. This results in a dense, predicted movie-user matrix.
* Calculate movie-movie similarities using the predicted matrix. And use this similarity matrix to find and recommend similar movies.
* This approach often provides more accurate recommendations especially with large sparse datasets. But more complex to implement and require more computational resources.
""")
gr.Interface(fn=recommend_movies_cfnn,
inputs=gr.Dropdown(movie_list, label=f"Select a Movie (Total movies: {total_movies}, randomly list {input_count} for demo purpose.)"),
outputs=[gr.Textbox(label="Recommended Movies:")],
# title="Movie Recommender - Item-Based Collaborative Filtering",
description="Select a movie to get recommendations based on collaborative filtering.")
# Launch the app
iface.launch() |