Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import linear_kernel | |
from datasets import load_dataset | |
# Load dataset from Hugging Face Datasets | |
dataset = load_dataset("charizdiannefalco/imdb_top_1000") | |
df = pd.DataFrame(dataset["train"]) | |
# Data cleaning. | |
df['Overview'] = df['Overview'].fillna('') | |
df['Gross'] = df['Gross'].fillna('0') | |
df['Gross'] = df['Gross'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float) | |
# TF-IDF Vectorization | |
tfidf = TfidfVectorizer(stop_words="english") | |
tfidf_matrix = tfidf.fit_transform(df["Overview"]) | |
# Calculate Cosine Similarity | |
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) | |
# Function to get movie recommendations | |
def get_recommendations(title, cosine_sim=cosine_sim, df = df): | |
idx = df[df["Series_Title"] == title].index[0] | |
sim_scores = list(enumerate(cosine_sim[idx])) | |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) | |
# Exclude the input movie itself | |
sim_scores = [score for score in sim_scores if score[0] != idx] | |
movie_indices = [i[0] for i in sim_scores] | |
movie_indices = movie_indices[0:3] | |
return df.iloc[movie_indices] | |
# Streamlit App | |
st.title("Movie Recommendation System") | |
user_movies = st.multiselect("Enter movies you like:", df["Series_Title"].tolist()) | |
if st.button("Get Recommendations"): | |
if user_movies: | |
all_recommendations = pd.DataFrame() | |
for movie in user_movies: | |
all_recommendations = pd.concat([all_recommendations, get_recommendations(movie)]) | |
#remove input movies from recommendations. | |
all_recommendations = all_recommendations[~all_recommendations['Series_Title'].isin(user_movies)] | |
all_recommendations = all_recommendations.drop_duplicates(subset=['Series_Title']) | |
if len(all_recommendations) > 3: | |
recommendations_df = all_recommendations.head(3) | |
else: | |
recommendations_df = all_recommendations | |
st.write("Recommended Movies:") | |
for index, row in recommendations_df.iterrows(): | |
st.image(row["Poster_Link"], width=150) | |
st.write(f"**Title:** {row['Series_Title']}") | |
st.write(f"**Released Year:** {row['Released_Year']}") | |
st.write(f"**Runtime:** {row['Runtime']}") | |
st.write(f"**Genre:** {row['Genre']}") | |
st.write(f"**Overview:** {row['Overview']}") | |
st.write(f"**Director:** {row['Director']}") | |
st.write("---") | |
else: | |
st.warning("Please enter at least one movie.") |