barakb21 commited on
Commit
97be09c
·
verified ·
1 Parent(s): ff6c1b4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from sentence_transformers import SentenceTransformer
7
+ from datasets import load_dataset
8
+
9
+ # === Load dataset from Hugging Face ===
10
+ raw_data = load_dataset("MongoDB/embedded_movies")
11
+ df = pd.DataFrame(raw_data["train"])
12
+
13
+ # Keep only relevant columns
14
+ df = df[["title", "fullplot"]].dropna().reset_index(drop=True)
15
+
16
+ # === Load model and compute embeddings ===
17
+ model = SentenceTransformer("all-MiniLM-L6-v2")
18
+ df["fullplot_embedding"] = model.encode(df["fullplot"].tolist(), show_progress_bar=True)
19
+
20
+ # === Helper Functions ===
21
+ def encode_input_text(text):
22
+ return model.encode([text])[0]
23
+
24
+ def extract_keywords(text):
25
+ words = re.findall(r'\b\w{5,}\b', text.lower())
26
+ return set(words)
27
+
28
+ def generate_explanation(user_keywords, movie_text):
29
+ movie_words = extract_keywords(movie_text)
30
+ matched = user_keywords & movie_words
31
+ return ", ".join(sorted(matched)) if matched else "No strong keyword match"
32
+
33
+ def compute_similar_movies(user_embedding, top_k=5):
34
+ movie_embeddings = np.stack(df["fullplot_embedding"].values)
35
+ similarities = cosine_similarity([user_embedding], movie_embeddings)[0]
36
+ df["similarity"] = similarities
37
+ top_df = df.sort_values("similarity", ascending=False).head(top_k)
38
+ return top_df
39
+
40
+ # === Gradio UI ===
41
+ with gr.Blocks() as demo:
42
+ gr.Markdown("## 🎬 Movie Recommender Engine with Explanation")
43
+ input_box = gr.Textbox(label="Enter your plot idea", placeholder="e.g., dreams within dreams")
44
+ output_box = gr.Textbox(label="Top Recommendations", lines=8)
45
+ recommend_button = gr.Button("Recommend")
46
+
47
+ def recommend(text):
48
+ user_embedding = encode_input_text(text)
49
+ user_keywords = extract_keywords(text)
50
+ top_df = compute_similar_movies(user_embedding)
51
+
52
+ results = []
53
+ for _, row in top_df.iterrows():
54
+ explanation = generate_explanation(user_keywords, row["fullplot"])
55
+ results.append(f"🎬 {row['title']} (score={row['similarity']:.2f})\n🔍 {explanation}")
56
+ return "\n\n".join(results)
57
+
58
+ recommend_button.click(fn=recommend, inputs=[input_box], outputs=[output_box])
59
+
60
+ demo.launch()