Spaces:
Sleeping
Sleeping
Commit
·
377bcfc
1
Parent(s):
70b325b
Pushing code
Browse files- app.py +66 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.metrics.pairwise import linear_kernel
|
5 |
+
from datasets import load_dataset
|
6 |
+
|
7 |
+
# Load dataset from Hugging Face Datasets
|
8 |
+
dataset = load_dataset("charizdiannefalco/imdb_top_1000")
|
9 |
+
df = pd.DataFrame(dataset["train"])
|
10 |
+
|
11 |
+
# Data cleaning.
|
12 |
+
df['Overview'] = df['Overview'].fillna('')
|
13 |
+
df['Gross'] = df['Gross'].fillna('0')
|
14 |
+
df['Gross'] = df['Gross'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)
|
15 |
+
|
16 |
+
# TF-IDF Vectorization
|
17 |
+
tfidf = TfidfVectorizer(stop_words="english")
|
18 |
+
tfidf_matrix = tfidf.fit_transform(df["Overview"])
|
19 |
+
|
20 |
+
# Calculate Cosine Similarity
|
21 |
+
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
|
22 |
+
|
23 |
+
# Function to get movie recommendations
|
24 |
+
def get_recommendations(title, cosine_sim=cosine_sim, df = df):
|
25 |
+
idx = df[df["Series_Title"] == title].index[0]
|
26 |
+
sim_scores = list(enumerate(cosine_sim[idx]))
|
27 |
+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
28 |
+
# Exclude the input movie itself
|
29 |
+
sim_scores = [score for score in sim_scores if score[0] != idx]
|
30 |
+
movie_indices = [i[0] for i in sim_scores]
|
31 |
+
movie_indices = movie_indices[0:3]
|
32 |
+
return df.iloc[movie_indices]
|
33 |
+
|
34 |
+
# Streamlit App
|
35 |
+
st.title("Movie Recommendation System")
|
36 |
+
|
37 |
+
user_movies = st.multiselect("Enter movies you like:", df["Series_Title"].tolist())
|
38 |
+
|
39 |
+
if st.button("Get Recommendations"):
|
40 |
+
if user_movies:
|
41 |
+
all_recommendations = pd.DataFrame()
|
42 |
+
for movie in user_movies:
|
43 |
+
all_recommendations = pd.concat([all_recommendations, get_recommendations(movie)])
|
44 |
+
|
45 |
+
#remove input movies from recommendations.
|
46 |
+
all_recommendations = all_recommendations[~all_recommendations['Series_Title'].isin(user_movies)]
|
47 |
+
|
48 |
+
all_recommendations = all_recommendations.drop_duplicates(subset=['Series_Title'])
|
49 |
+
|
50 |
+
if len(all_recommendations) > 3:
|
51 |
+
recommendations_df = all_recommendations.head(3)
|
52 |
+
else:
|
53 |
+
recommendations_df = all_recommendations
|
54 |
+
|
55 |
+
st.write("Recommended Movies:")
|
56 |
+
for index, row in recommendations_df.iterrows():
|
57 |
+
st.image(row["Poster_Link"], width=150)
|
58 |
+
st.write(f"**Title:** {row['Series_Title']}")
|
59 |
+
st.write(f"**Released Year:** {row['Released_Year']}")
|
60 |
+
st.write(f"**Runtime:** {row['Runtime']}")
|
61 |
+
st.write(f"**Genre:** {row['Genre']}")
|
62 |
+
st.write(f"**Overview:** {row['Overview']}")
|
63 |
+
st.write(f"**Director:** {row['Director']}")
|
64 |
+
st.write("---")
|
65 |
+
else:
|
66 |
+
st.warning("Please enter at least one movie.")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
scikit-learn
|
3 |
+
pandas
|
4 |
+
transformers
|
5 |
+
datasets
|