from transformers import BertTokenizer, BertModel import torch from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import time loaded_model = BertModel.from_pretrained('model') loaded_tokenizer = BertTokenizer.from_pretrained('tokenizer') device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") def filter_by_ganre(df: pd.DataFrame, ganre_list: list): filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))] return filtered_df end_time = time.time() def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10): start_time = time.time() tokens = loaded_tokenizer(user_text, return_tensors="pt", padding=True, truncation=True) loaded_model.to(device) loaded_model.eval() with torch.no_grad(): tokens = {key: value.to(loaded_model.device) for key, value in tokens.items()} outputs = loaded_model(**tokens) user_embedding = output.last_hidden_state.mean(dim=1).squeeze().cpu().detach().numpy() cosine_similarities = cosine_similarity(embeddings, user_embedding.reshape(1, -1)) df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False) dict_topn = df_res.iloc[:n, :].cos_sim.to_dict() end_time = time.time() return dict_topn