charizdiannefalco commited on
Commit
377bcfc
·
1 Parent(s): 70b325b

Pushing code

Browse files
Files changed (2) hide show
  1. app.py +66 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import linear_kernel
5
+ from datasets import load_dataset
6
+
7
+ # Load dataset from Hugging Face Datasets
8
+ dataset = load_dataset("charizdiannefalco/imdb_top_1000")
9
+ df = pd.DataFrame(dataset["train"])
10
+
11
+ # Data cleaning.
12
+ df['Overview'] = df['Overview'].fillna('')
13
+ df['Gross'] = df['Gross'].fillna('0')
14
+ df['Gross'] = df['Gross'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)
15
+
16
+ # TF-IDF Vectorization
17
+ tfidf = TfidfVectorizer(stop_words="english")
18
+ tfidf_matrix = tfidf.fit_transform(df["Overview"])
19
+
20
+ # Calculate Cosine Similarity
21
+ cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
22
+
23
+ # Function to get movie recommendations
24
+ def get_recommendations(title, cosine_sim=cosine_sim, df = df):
25
+ idx = df[df["Series_Title"] == title].index[0]
26
+ sim_scores = list(enumerate(cosine_sim[idx]))
27
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
28
+ # Exclude the input movie itself
29
+ sim_scores = [score for score in sim_scores if score[0] != idx]
30
+ movie_indices = [i[0] for i in sim_scores]
31
+ movie_indices = movie_indices[0:3]
32
+ return df.iloc[movie_indices]
33
+
34
+ # Streamlit App
35
+ st.title("Movie Recommendation System")
36
+
37
+ user_movies = st.multiselect("Enter movies you like:", df["Series_Title"].tolist())
38
+
39
+ if st.button("Get Recommendations"):
40
+ if user_movies:
41
+ all_recommendations = pd.DataFrame()
42
+ for movie in user_movies:
43
+ all_recommendations = pd.concat([all_recommendations, get_recommendations(movie)])
44
+
45
+ #remove input movies from recommendations.
46
+ all_recommendations = all_recommendations[~all_recommendations['Series_Title'].isin(user_movies)]
47
+
48
+ all_recommendations = all_recommendations.drop_duplicates(subset=['Series_Title'])
49
+
50
+ if len(all_recommendations) > 3:
51
+ recommendations_df = all_recommendations.head(3)
52
+ else:
53
+ recommendations_df = all_recommendations
54
+
55
+ st.write("Recommended Movies:")
56
+ for index, row in recommendations_df.iterrows():
57
+ st.image(row["Poster_Link"], width=150)
58
+ st.write(f"**Title:** {row['Series_Title']}")
59
+ st.write(f"**Released Year:** {row['Released_Year']}")
60
+ st.write(f"**Runtime:** {row['Runtime']}")
61
+ st.write(f"**Genre:** {row['Genre']}")
62
+ st.write(f"**Overview:** {row['Overview']}")
63
+ st.write(f"**Director:** {row['Director']}")
64
+ st.write("---")
65
+ else:
66
+ st.warning("Please enter at least one movie.")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ pandas
4
+ transformers
5
+ datasets