charizdiannefalco's picture
Pushing code
377bcfc
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from datasets import load_dataset
# Load dataset from Hugging Face Datasets
dataset = load_dataset("charizdiannefalco/imdb_top_1000")
df = pd.DataFrame(dataset["train"])
# Data cleaning.
df['Overview'] = df['Overview'].fillna('')
df['Gross'] = df['Gross'].fillna('0')
df['Gross'] = df['Gross'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)
# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["Overview"])
# Calculate Cosine Similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim, df = df):
idx = df[df["Series_Title"] == title].index[0]
sim_scores = list(enumerate(cosine_sim[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Exclude the input movie itself
sim_scores = [score for score in sim_scores if score[0] != idx]
movie_indices = [i[0] for i in sim_scores]
movie_indices = movie_indices[0:3]
return df.iloc[movie_indices]
# Streamlit App
st.title("Movie Recommendation System")
user_movies = st.multiselect("Enter movies you like:", df["Series_Title"].tolist())
if st.button("Get Recommendations"):
if user_movies:
all_recommendations = pd.DataFrame()
for movie in user_movies:
all_recommendations = pd.concat([all_recommendations, get_recommendations(movie)])
#remove input movies from recommendations.
all_recommendations = all_recommendations[~all_recommendations['Series_Title'].isin(user_movies)]
all_recommendations = all_recommendations.drop_duplicates(subset=['Series_Title'])
if len(all_recommendations) > 3:
recommendations_df = all_recommendations.head(3)
else:
recommendations_df = all_recommendations
st.write("Recommended Movies:")
for index, row in recommendations_df.iterrows():
st.image(row["Poster_Link"], width=150)
st.write(f"**Title:** {row['Series_Title']}")
st.write(f"**Released Year:** {row['Released_Year']}")
st.write(f"**Runtime:** {row['Runtime']}")
st.write(f"**Genre:** {row['Genre']}")
st.write(f"**Overview:** {row['Overview']}")
st.write(f"**Director:** {row['Director']}")
st.write("---")
else:
st.warning("Please enter at least one movie.")