chrisaldikaraharja's picture
Update app.py
17680ba verified
import pandas as pd
import streamlit as st
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict
# Step 1: Load dataset from Hugging Face (raw file URL)
dataset_url = "https://huggingface.co/spaces/chrisaldikaraharja/MovieRecommendationEngine/resolve/main/imdb_movies.csv"
try:
df = pd.read_csv(dataset_url)
st.write("Dataset Loaded Successfully βœ…")
st.write(df.head())
except Exception as e:
st.error(f"Failed to load dataset: {e}")
st.stop() # Stop the app if the dataset cannot be loaded
# Convert categorical columns to string (avoids data type issues)
categorical_columns = ['genre', 'orig_title', 'orig_lang', 'country', 'crew']
for col in categorical_columns:
df[col] = df[col].astype(str)
# Step 2: Prepare dataset for Surprise library (collaborative filtering)
reader = Reader(rating_scale=(df['score'].min(), df['score'].max()))
data = Dataset.load_from_df(df[['orig_title', 'orig_lang', 'score']], reader)
# Train model
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
model = SVD(n_factors=50, random_state=42)
model.fit(trainset)
# Step 3: Define recommendation function
def get_recommendations(selected_movies, genre):
if not selected_movies:
return ["Please select at least one movie."]
filtered_movies = df[df['genre'] == genre]
movie_scores = defaultdict(float)
for movie in filtered_movies['orig_title'].unique():
est_score = model.predict(uid='user', iid=movie).est
movie_scores[movie] = est_score
recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True)
recommended_movies = [movie for movie, _ in recommended_movies if movie not in selected_movies]
return recommended_movies[:5] # Top 5 recommendations
def get_movies_by_genre(genre):
return df[df['genre'] == genre]['orig_title'].unique().tolist()
# Step 4: Streamlit UI
st.title("🎬 Movie Recommendation System")
# Genre selection
genre_list = sorted(df['genre'].unique().tolist())
selected_genre = st.selectbox("Select a Genre", genre_list)
# Movie selection (dynamically updates based on genre)
movie_options = get_movies_by_genre(selected_genre)
selected_movies = st.multiselect("Select Up to 3 Movies", movie_options)
# Recommendation button
if st.button("Get Recommendations"):
recommendations = get_recommendations(selected_movies, selected_genre)
st.write("### Recommended Movies:")
for movie in recommendations:
st.write(f"- {movie}")