Update app.py
Browse files
app.py
CHANGED
@@ -5,31 +5,28 @@ from surprise.model_selection import train_test_split
|
|
5 |
from collections import defaultdict
|
6 |
import kagglehub
|
7 |
|
8 |
-
# Step 1: Download the latest version of the dataset
|
9 |
-
|
|
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
|
14 |
-
# Step
|
15 |
-
dataset_path = f"{dataset_folder}/imdb_movies.csv"
|
16 |
-
|
17 |
-
# Load the dataset
|
18 |
df = pd.read_csv(dataset_path)
|
19 |
|
20 |
-
# Ensure
|
21 |
categorical_columns = ['genre', 'orig_title', 'orig_lang', 'country', 'crew']
|
22 |
-
|
23 |
for col in categorical_columns:
|
24 |
-
df[col] = df[col].astype(str)
|
25 |
|
26 |
-
# Check
|
27 |
if df['genre'].str.isnumeric().all():
|
28 |
print("Warning: Genre column is numeric. Mapping needed.")
|
29 |
genre_mapping = {i: f"Genre_{i}" for i in df['genre'].unique()}
|
30 |
df['genre'] = df['genre'].map(genre_mapping)
|
31 |
|
32 |
-
# Prepare dataset for Surprise
|
33 |
reader = Reader(rating_scale=(df['score'].min(), df['score'].max()))
|
34 |
data = Dataset.load_from_df(df[['orig_title', 'orig_lang', 'score']], reader)
|
35 |
|
@@ -38,45 +35,40 @@ trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
|
|
38 |
model = SVD(n_factors=50, random_state=42)
|
39 |
model.fit(trainset)
|
40 |
|
41 |
-
#
|
42 |
def get_recommendations(selected_movies, genre):
|
43 |
if not selected_movies:
|
44 |
return ["Please select at least one movie."]
|
45 |
|
46 |
-
# Filter dataset by genre
|
47 |
filtered_movies = df[df['genre'] == genre]
|
48 |
-
|
49 |
-
# Store average scores of all movies
|
50 |
movie_scores = defaultdict(float)
|
51 |
|
52 |
-
# Predict ratings for all movies in the filtered dataset
|
53 |
for movie in filtered_movies['orig_title'].unique():
|
54 |
est_score = model.predict(uid='user', iid=movie).est
|
55 |
movie_scores[movie] = est_score
|
56 |
|
57 |
-
# Sort movies by predicted score (descending)
|
58 |
recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True)
|
59 |
-
|
60 |
-
# Exclude already selected movies
|
61 |
recommended_movies = [movie for movie, _ in recommended_movies if movie not in selected_movies]
|
62 |
|
63 |
-
return recommended_movies[:5] #
|
|
|
|
|
|
|
64 |
|
65 |
-
# Streamlit UI
|
66 |
st.title("🎬 Movie Recommendation System")
|
67 |
|
68 |
# Genre selection
|
69 |
-
|
70 |
-
|
71 |
-
# Get available movies for the selected genre
|
72 |
-
movies_in_genre = df[df['genre'] == selected_genre]['orig_title'].unique().tolist()
|
73 |
|
74 |
-
# Movie selection
|
75 |
-
|
|
|
76 |
|
77 |
# Recommendation button
|
78 |
if st.button("Get Recommendations"):
|
79 |
recommendations = get_recommendations(selected_movies, selected_genre)
|
80 |
-
st.
|
81 |
for movie in recommendations:
|
82 |
-
st.write(f"- {movie}")
|
|
|
5 |
from collections import defaultdict
|
6 |
import kagglehub
|
7 |
|
8 |
+
# Step 1: Download the latest version of the dataset
|
9 |
+
dataset_ref = "ashpalsingh1525/imdb-movies-dataset"
|
10 |
+
path = kagglehub.download(dataset_ref)
|
11 |
|
12 |
+
# Define the dataset path (update filename if needed)
|
13 |
+
dataset_path = f"{path}/imdb_movies.csv"
|
14 |
|
15 |
+
# Step 2: Load the dataset
|
|
|
|
|
|
|
16 |
df = pd.read_csv(dataset_path)
|
17 |
|
18 |
+
# Ensure categorical columns are strings
|
19 |
categorical_columns = ['genre', 'orig_title', 'orig_lang', 'country', 'crew']
|
|
|
20 |
for col in categorical_columns:
|
21 |
+
df[col] = df[col].astype(str)
|
22 |
|
23 |
+
# Check if genre column needs mapping
|
24 |
if df['genre'].str.isnumeric().all():
|
25 |
print("Warning: Genre column is numeric. Mapping needed.")
|
26 |
genre_mapping = {i: f"Genre_{i}" for i in df['genre'].unique()}
|
27 |
df['genre'] = df['genre'].map(genre_mapping)
|
28 |
|
29 |
+
# Step 3: Prepare dataset for Surprise library
|
30 |
reader = Reader(rating_scale=(df['score'].min(), df['score'].max()))
|
31 |
data = Dataset.load_from_df(df[['orig_title', 'orig_lang', 'score']], reader)
|
32 |
|
|
|
35 |
model = SVD(n_factors=50, random_state=42)
|
36 |
model.fit(trainset)
|
37 |
|
38 |
+
# Step 4: Define functions for recommendation
|
39 |
def get_recommendations(selected_movies, genre):
|
40 |
if not selected_movies:
|
41 |
return ["Please select at least one movie."]
|
42 |
|
|
|
43 |
filtered_movies = df[df['genre'] == genre]
|
|
|
|
|
44 |
movie_scores = defaultdict(float)
|
45 |
|
|
|
46 |
for movie in filtered_movies['orig_title'].unique():
|
47 |
est_score = model.predict(uid='user', iid=movie).est
|
48 |
movie_scores[movie] = est_score
|
49 |
|
|
|
50 |
recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
|
51 |
recommended_movies = [movie for movie, _ in recommended_movies if movie not in selected_movies]
|
52 |
|
53 |
+
return recommended_movies[:5] # Top 5 recommendations
|
54 |
+
|
55 |
+
def get_movies_by_genre(genre):
|
56 |
+
return df[df['genre'] == genre]['orig_title'].unique().tolist()
|
57 |
|
58 |
+
# Step 5: Streamlit UI
|
59 |
st.title("🎬 Movie Recommendation System")
|
60 |
|
61 |
# Genre selection
|
62 |
+
genre_list = sorted(df['genre'].unique().tolist())
|
63 |
+
selected_genre = st.selectbox("Select a Genre", genre_list)
|
|
|
|
|
64 |
|
65 |
+
# Movie selection (dynamically updates based on genre)
|
66 |
+
movie_options = get_movies_by_genre(selected_genre)
|
67 |
+
selected_movies = st.multiselect("Select Up to 3 Movies", movie_options)
|
68 |
|
69 |
# Recommendation button
|
70 |
if st.button("Get Recommendations"):
|
71 |
recommendations = get_recommendations(selected_movies, selected_genre)
|
72 |
+
st.write("### Recommended Movies:")
|
73 |
for movie in recommendations:
|
74 |
+
st.write(f"- {movie}")
|