import pandas as pd import torch # sparse_indices = torch.load("data/sparse_indices.pt") # sparse_values = torch.load("data/sparse_values.pt") class EASE: def __init__(self, train, user_col='user_id', item_col='app_id', score_col='is_recommended', reg=150.): """ train: (DataFrame) data of training set user_col: (String) column name of users column item_col: (String) column name of items column score_col: (String) column name of interactions column reg: (Float) EASE's regularization value """ self.user_col = user_col self.item_col = item_col self.score_col = score_col self.train = train self.reg = reg self.user_id_col = user_col + "_index" self.item_id_col = item_col + "_index" self.item_lookup = self.generate_label(train, self.item_col) self.item_map = {} for item, item_index in self.item_lookup.values: self.item_map[item_index] = item def generate_label(self, df, col): dist_labels = df[[col]].drop_duplicates() dist_labels[col + "_index"] = dist_labels[col].astype("category").cat.codes return dist_labels def scaling_score(preds_tensor): work_tensor = preds_tensor min_score = torch.min(preds_tensor) max_score = torch.max(preds_tensor) scaled = 10 * (work_tensor - min_score) / (max_score - min_score) return scaled def predict_active(self, pred_df, weight_mx, k=10, weight_lambda=0.8, hybrid_model=True, remove_owned=True): """ Args: pred_df: (DataFrame) data of user interactions weight: (Tensor) Weight matrix of pre-trained EASE model k: (Integer) number of recommendation to be shown remove_owned: (Boolean) Whether to remove already interacted items """ train = pd.concat([self.train, pred_df], axis=0) user_lookup = self.generate_label(train, self.user_col) train = train.merge(user_lookup, on=[self.user_col], sort=False) train = train.merge(self.item_lookup, on=[self.item_col], sort=False) pred_df = pred_df[[self.user_col]].drop_duplicates() pred_df = pred_df.merge(user_lookup, on=[self.user_col], sort=False) indices = torch.LongTensor(train[[self.user_id_col, self.item_id_col]].values) values = torch.FloatTensor(train[self.score_col]) sparse = torch.sparse.FloatTensor(indices.T, values) # -------------------------------------------------- user_act_tensor = sparse.index_select( dim=0, index=torch.LongTensor(pred_df[self.user_id_col]) ) _preds_act_tensor = user_act_tensor @ weight_mx _preds_act_tensor = 1 * \ (_preds_act_tensor - torch.min(_preds_act_tensor)) / \ (torch.max(_preds_act_tensor) - torch.min(_preds_act_tensor)) if remove_owned: _preds_act_tensor += -1. * user_act_tensor if hybrid_model: _preds_act_tensor = _preds_act_tensor * weight_lambda output_preds = [] score_preds = [] for _preds in _preds_act_tensor: top_items = _preds.topk(k) output_preds.append([self.item_map[id] for id in top_items.indices.tolist()]) score_preds.append( top_items.values.tolist() ) # print("'\nOutput preds:", output_preds) # print("Score preds:", score_preds) pred_df['predicted_items'] = output_preds pred_df['predicted_score'] = score_preds escaped_id = [ele for i_list in pred_df['predicted_items'].values for ele in i_list] escaped_score = [score for s_list in pred_df['predicted_score'].values for score in s_list] pred_result = pd.DataFrame({ 'app_id' : escaped_id, 'predicted_score' : escaped_score }) return pred_result def ease_model(pred_df, k=10): ease_B = torch.load("data/ease_B.pt") train = pd.read_csv("data/recs.csv") ease = EASE(train) try: res = ease.predict_active(pred_df=pred_df, weight_mx=ease_B, k=k) except: return return res # def main(): # pass # # act_user = pd.DataFrame({ # # 'user_id': [999999, 999999, 999999, 999999, 999999, 999999], # # 'app_id': [1689910, 1245620, 814380, 620980, 1551360, 774171], # # 'is_recommended': [0, 1, 1, 0, 1, 1] # # }) # # act_indices = torch.FloatTensor(ac) # # print( # # torch.sparse.FloatTensor(sparse_indices.T, sparse_values) # # ) # if __name__ == '__main__': # main()