File size: 1,368 Bytes
ef6dece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from transformers import BertTokenizer, BertModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import time

loaded_model = BertModel.from_pretrained('model')
loaded_tokenizer = BertTokenizer.from_pretrained('tokenizer')
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


def filter_by_ganre(df: pd.DataFrame, ganre_list: list):
    filtered_df = df[df['ganres'].apply(lambda x: any(g in ganre_list for g in(x)))]
    return filtered_df


end_time = time.time()


def recommendation(df: pd.DataFrame, embeddings:np.array, user_text: str, n=10):
    start_time = time.time()
    tokens = loaded_tokenizer(user_text, return_tensors="pt", padding=True, truncation=True)
    loaded_model.to(device)
    loaded_model.eval()
    with torch.no_grad():
        tokens = {key: value.to(loaded_model.device) for key, value in tokens.items()}
        outputs = loaded_model(**tokens)
        user_embedding = output.last_hidden_state.mean(dim=1).squeeze().cpu().detach().numpy()
    cosine_similarities = cosine_similarity(embeddings, user_embedding.reshape(1, -1))
    df_res = pd.DataFrame(cosine_similarities.ravel(), columns=['cos_sim']).sort_values('cos_sim', ascending=False)
    dict_topn = df_res.iloc[:n, :].cos_sim.to_dict()
    end_time = time.time()
    return dict_topn