Spaces:
Sleeping
Sleeping
File size: 4,587 Bytes
00e640a 9c3c7e9 00e640a b826d7a 00e640a 9c3c7e9 00e640a 9c3c7e9 00e640a 9c3c7e9 b01c3d3 00e640a 9c3c7e9 00e640a 9c3c7e9 00e640a b826d7a 00e640a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import pandas as pd
import torch
# sparse_indices = torch.load("data/sparse_indices.pt")
# sparse_values = torch.load("data/sparse_values.pt")
class EASE:
def __init__(self, train,
user_col='user_id',
item_col='app_id',
score_col='is_recommended',
reg=250.):
"""
train: (DataFrame) data of training set
user_col: (String) column name of users column
item_col: (String) column name of items column
score_col: (String) column name of interactions column
reg: (Float) EASE's regularization value
"""
self.user_col = user_col
self.item_col = item_col
self.score_col = score_col
self.train = train
self.reg = reg
self.user_id_col = user_col + "_index"
self.item_id_col = item_col + "_index"
self.item_lookup = self.generate_label(train, self.item_col)
self.item_map = {}
for item, item_index in self.item_lookup.values:
self.item_map[item_index] = item
def generate_label(self, df, col):
dist_labels = df[[col]].drop_duplicates()
dist_labels[col +
"_index"] = dist_labels[col].astype("category").cat.codes
return dist_labels
def predict_active(self, pred_df,
weight_mx,
k=10,
weight_lambda=0.7,
hybrid_model=True,
remove_owned=True):
"""
Args:
pred_df: (DataFrame) data of user interactions
weight: (Tensor) Weight matrix of pre-trained EASE model
k: (Integer) number of recommendation to be shown
remove_owned: (Boolean) Whether to remove already interacted items
"""
train = pd.concat([self.train, pred_df], axis=0)
user_lookup = self.generate_label(train, self.user_col)
train = train.merge(user_lookup, on=[self.user_col], sort=False)
train = train.merge(self.item_lookup, on=[self.item_col], sort=False)
pred_df = pred_df[[self.user_col]].drop_duplicates()
pred_df = pred_df.merge(user_lookup, on=[self.user_col], sort=False)
indices = torch.LongTensor(train[[self.user_id_col, self.item_id_col]].values)
values = torch.FloatTensor(train[self.score_col])
sparse = torch.sparse.FloatTensor(indices.T, values)
# --------------------------------------------------
user_act_tensor = sparse.index_select(
dim=0, index=torch.LongTensor(pred_df[self.user_id_col])
)
_preds_act_tensor = user_act_tensor @ weight_mx
if remove_owned:
_preds_act_tensor += -1. * user_act_tensor
if hybrid_model:
_preds_act_tensor = _preds_act_tensor * weight_lambda
output_preds = []
score_preds = []
for _preds in _preds_act_tensor:
top_items = _preds.topk(k)
output_preds.append([self.item_map[id] for id in top_items.indices.tolist()])
score_preds.append( top_items.values.tolist() )
# print("'\nOutput preds:", output_preds)
# print("Score preds:", score_preds)
pred_df['predicted_items'] = output_preds
pred_df['predicted_score'] = score_preds
escaped_id = [ele for i_list in pred_df['predicted_items'].values for ele in i_list]
escaped_score = [score for s_list in pred_df['predicted_score'].values for score in s_list]
pred_result = pd.DataFrame({
'app_id' : escaped_id,
'predicted_score' : escaped_score
})
# print(pred_result)
return pred_result
def ease_model(pred_df, k=10):
ease_B = torch.load("data/ease_B.pt")
train = pd.read_csv("data/recs.csv")
ease = EASE(train)
res = ease.predict_active(pred_df=pred_df, weight_mx=ease_B, k=k)
return res
# def main():
# pass
# # act_user = pd.DataFrame({
# # 'user_id': [999999, 999999, 999999, 999999, 999999, 999999],
# # 'app_id': [1689910, 1245620, 814380, 620980, 1551360, 774171],
# # 'is_recommended': [0, 1, 1, 0, 1, 1]
# # })
# # act_indices = torch.FloatTensor(ac)
# # print(
# # torch.sparse.FloatTensor(sparse_indices.T, sparse_values)
# # )
# if __name__ == '__main__':
# main() |