import pandas as pd import streamlit as st from surprise import Dataset, Reader, SVD from surprise.model_selection import train_test_split from collections import defaultdict # Step 1: Load dataset from Hugging Face (raw file URL) dataset_url = "https://huggingface.co/spaces/chrisaldikaraharja/MovieRecommendationEngine/resolve/main/imdb_movies.csv" try: df = pd.read_csv(dataset_url) st.write("Dataset Loaded Successfully ✅") st.write(df.head()) except Exception as e: st.error(f"Failed to load dataset: {e}") st.stop() # Stop the app if the dataset cannot be loaded # Convert categorical columns to string (avoids data type issues) categorical_columns = ['genre', 'orig_title', 'orig_lang', 'country', 'crew'] for col in categorical_columns: df[col] = df[col].astype(str) # Step 2: Prepare dataset for Surprise library (collaborative filtering) reader = Reader(rating_scale=(df['score'].min(), df['score'].max())) data = Dataset.load_from_df(df[['orig_title', 'orig_lang', 'score']], reader) # Train model trainset, testset = train_test_split(data, test_size=0.2, random_state=42) model = SVD(n_factors=50, random_state=42) model.fit(trainset) # Step 3: Define recommendation function def get_recommendations(selected_movies, genre): if not selected_movies: return ["Please select at least one movie."] filtered_movies = df[df['genre'] == genre] movie_scores = defaultdict(float) for movie in filtered_movies['orig_title'].unique(): est_score = model.predict(uid='user', iid=movie).est movie_scores[movie] = est_score recommended_movies = sorted(movie_scores.items(), key=lambda x: x[1], reverse=True) recommended_movies = [movie for movie, _ in recommended_movies if movie not in selected_movies] return recommended_movies[:5] # Top 5 recommendations def get_movies_by_genre(genre): return df[df['genre'] == genre]['orig_title'].unique().tolist() # Step 4: Streamlit UI st.title("🎬 Movie Recommendation System") # Genre selection genre_list = sorted(df['genre'].unique().tolist()) selected_genre = st.selectbox("Select a Genre", genre_list) # Movie selection (dynamically updates based on genre) movie_options = get_movies_by_genre(selected_genre) selected_movies = st.multiselect("Select Up to 3 Movies", movie_options) # Recommendation button if st.button("Get Recommendations"): recommendations = get_recommendations(selected_movies, selected_genre) st.write("### Recommended Movies:") for movie in recommendations: st.write(f"- {movie}")