Spaces:

Tesneem
/

Netflix_Recommendation

Running

App Files Files Community

Tesneem commited on Nov 8, 2024

Commit

3331cdd

verified ·

1 Parent(s): 61804bb

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -78

app.py CHANGED Viewed

@@ -1,22 +1,76 @@
-import gradio as gr
-# def greet(name):
-#     return "Hello " + name + "!!"
-from sentence_transformers import SentenceTransformer
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
-from datasets import load_dataset
-# Load pre-trained SentenceTransformer model
-embedding_model = SentenceTransformer("thenlper/gte-large")
-# # Example dataset with genres (replace with your actual data)
 # dataset = load_dataset("hugginglearners/netflix-shows")
-# dataset = dataset.filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None)
-# data = dataset['train']  # Accessing the 'train' split of the dataset
-# # Convert the dataset to a list of dictionaries for easier indexing
-# data_list = list[data]
-# print(data_list)
 # # Combine description and genre for embedding
 # def combine_description_title_and_genre(description, listed_in, title):
 #     return f"{description} Genre: {listed_in} Title: {title}"
@@ -29,80 +83,60 @@ embedding_model = SentenceTransformer("thenlper/gte-large")
 # def vector_search(query):
 #     query_embedding = get_embedding(query)
-#     # Generate embeddings for the combined description and genre
-#     embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]])
-#     # Calculate cosine similarity between the query and all embeddings
-#     similarities = cosine_similarity([query_embedding], embeddings)
-# Load dataset (using the correct dataset identifier for your case)
-dataset = load_dataset("hugginglearners/netflix-shows")
-# Combine description and genre for embedding
-def combine_description_title_and_genre(description, listed_in, title):
-    return f"{description} Genre: {listed_in} Title: {title}"
-# Generate embedding for the query
-def get_embedding(text):
-    return embedding_model.encode(text)
-# Vector search function
-def vector_search(query):
-    query_embedding = get_embedding(query)
-    # Function to generate embeddings for each item in the dataset
-    def generate_embeddings(example):
-        return {
-            'embedding': get_embedding(combine_description_title_and_genre(example["description"], example["listed_in"], example["title"]))
-        }
-    # Generate embeddings for the dataset using map
-    embeddings_dataset = dataset["train"].map(generate_embeddings)
-    # Extract embeddings
-    embeddings = np.array([embedding['embedding'] for embedding in embeddings_dataset])
-    # Calculate cosine similarity between the query and all embeddings
-    similarities = cosine_similarity([query_embedding], embeddings)
-    # # Adjust similarity scores based on ratings
-    # ratings = np.array([item["rating"] for item in data_list])
-    # adjusted_similarities = similarities * ratings.reshape(-1, 1)
-     # Get top N most similar items (e.g., top 3)
-    top_n = 3
-    top_indices = similarities[0].argsort()[-top_n:][::-1]  # Get indices of the top N results
-    top_items = [dataset["train"][i] for i in top_indices]
-    # Format the output for display
-    search_result = ""
-    for item in top_items:
-        search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}\n"
-    return search_result
-# Gradio Interface
-def movie_search(query):
-    return vector_search(query)
-with gr.Blocks() as demo:
-    gr.Markdown("# Netflix Recommendation System")
-    gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.")
-    query = gr.Textbox(label="Enter your query")
-    output = gr.Textbox(label="Recommendations")
-    submit_button = gr.Button("Submit")
-    submit_button.click(fn=movie_search, inputs=query, outputs=output)
-demo.launch()
-# iface = gr.Interface(fn=movie_search,
-#                      inputs=gr.inputs.Textbox(label="Enter your query"),
-#                      outputs="text",
-#                      live=True,
-#                      title="Netflix Recommendation System",
-#                      description="Enter a query to get Netflix recommendations based on description and genre.")
-# iface.launch()
-# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-# demo.launch()

 import numpy as np
+import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
+# Load embeddings and metadata
+embeddings = np.load("path/to/netflix_embeddings.npy")
+metadata = pd.read_csv("path/to/netflix_metadata.csv")
+# Vector search function
+def vector_search(query, model):
+    query_embedding = model.encode(query)
+    similarities = cosine_similarity([query_embedding], embeddings)[0]
+    top_n = 3
+    top_indices = similarities.argsort()[-top_n:][::-1]
+    results = metadata.iloc[top_indices]
+    # Format results for display
+    result_text = "\n".join(f"Title: {row['title']}, Description: {row['description']}, Genre: {row['listed_in']}" for _, row in results.iterrows())
+    return result_text
+# Gradio Interface
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("thenlper/gte-large")
+with gr.Blocks() as demo:
+    query = gr.Textbox(label="Enter your query")
+    output = gr.Textbox(label="Recommendations")
+    submit_button = gr.Button("Submit")
+    submit_button.click(fn=lambda q: vector_search(q, model), inputs=query, outputs=output)
+demo.launch()
+# import gradio as gr
+# # def greet(name):
+# #     return "Hello " + name + "!!"
+# from sentence_transformers import SentenceTransformer
+# import numpy as np
+# from sklearn.metrics.pairwise import cosine_similarity
+# from datasets import load_dataset
+# # Load pre-trained SentenceTransformer model
+# embedding_model = SentenceTransformer("thenlper/gte-large")
+# # # Example dataset with genres (replace with your actual data)
+# # dataset = load_dataset("hugginglearners/netflix-shows")
+# # dataset = dataset.filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None)
+# # data = dataset['train']  # Accessing the 'train' split of the dataset
+# # # Convert the dataset to a list of dictionaries for easier indexing
+# # data_list = list[data]
+# # print(data_list)
+# # # Combine description and genre for embedding
+# # def combine_description_title_and_genre(description, listed_in, title):
+# #     return f"{description} Genre: {listed_in} Title: {title}"
+# # # Generate embedding for the query
+# # def get_embedding(text):
+# #     return embedding_model.encode(text)
+# # # Vector search function
+# # def vector_search(query):
+# #     query_embedding = get_embedding(query)
+# #     # Generate embeddings for the combined description and genre
+# #     embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]])
+# #     # Calculate cosine similarity between the query and all embeddings
+# #     similarities = cosine_similarity([query_embedding], embeddings)
+# # Load dataset (using the correct dataset identifier for your case)
 # dataset = load_dataset("hugginglearners/netflix-shows")
 # # Combine description and genre for embedding
 # def combine_description_title_and_genre(description, listed_in, title):
 #     return f"{description} Genre: {listed_in} Title: {title}"
 # def vector_search(query):
 #     query_embedding = get_embedding(query)
+#     # Function to generate embeddings for each item in the dataset
+#     def generate_embeddings(example):
+#         return {
+#             'embedding': get_embedding(combine_description_title_and_genre(example["description"], example["listed_in"], example["title"]))
+#         }
+#     # Generate embeddings for the dataset using map
+#     embeddings_dataset = dataset["train"].map(generate_embeddings)
+#     # Extract embeddings
+#     embeddings = np.array([embedding['embedding'] for embedding in embeddings_dataset])
+#     # Calculate cosine similarity between the query and all embeddings
+#     similarities = cosine_similarity([query_embedding], embeddings)
+#     # # Adjust similarity scores based on ratings
+#     # ratings = np.array([item["rating"] for item in data_list])
+#     # adjusted_similarities = similarities * ratings.reshape(-1, 1)
+#      # Get top N most similar items (e.g., top 3)
+#     top_n = 3
+#     top_indices = similarities[0].argsort()[-top_n:][::-1]  # Get indices of the top N results
+#     top_items = [dataset["train"][i] for i in top_indices]
+#     # Format the output for display
+#     search_result = ""
+#     for item in top_items:
+#         search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}\n"
+#     return search_result
+# # Gradio Interface
+# def movie_search(query):
+#     return vector_search(query)
+# with gr.Blocks() as demo:
+#     gr.Markdown("# Netflix Recommendation System")
+#     gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.")
+#     query = gr.Textbox(label="Enter your query")
+#     output = gr.Textbox(label="Recommendations")
+#     submit_button = gr.Button("Submit")
+#     submit_button.click(fn=movie_search, inputs=query, outputs=output)
+# demo.launch()
+# # iface = gr.Interface(fn=movie_search,
+# #                      inputs=gr.inputs.Textbox(label="Enter your query"),
+# #                      outputs="text",
+# #                      live=True,
+# #                      title="Netflix Recommendation System",
+# #                      description="Enter a query to get Netflix recommendations based on description and genre.")
+# # iface.launch()
+# # demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+# # demo.launch()