Spaces:
Sleeping
Sleeping
File size: 4,300 Bytes
b826d7a 39bb071 b826d7a 9c3c7e9 39bb071 9c3c7e9 b826d7a 9c3c7e9 b826d7a 9c3c7e9 b826d7a 9c3c7e9 b826d7a 48b0e4b b826d7a 48b0e4b b826d7a 99c7f3b 1c645c1 b826d7a 1c645c1 b826d7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import numpy as np
import pandas as pd
import torch
from sklearn.neighbors import KNeighborsClassifier
class KnnCBF:
def __init__(self, items,
user_col='user_id',
item_col='app_id',
score_col='is_recommended',
nearest_k=2,
metric="cosine"):
"""
Args:
items: (DataFrame) games dataframe contain tags attribute
user_col: (String) column name of users column
item_col: (String) column name of items column
score_col: (String) column name of interactions column
k_nearest: (Integer) number of nearest interacted items for similarity
"""
self.user_col = user_col
self.item_col = item_col
self.score_col = score_col
self.nearest_k = nearest_k
self.metric = metric
self.user_id_col = user_col + "_index"
self.item_id_col = item_col + "_index"
self.item_lookup = self.generate_label(items, self.item_col)
self.item_map = {}
for item, item_index in self.item_lookup.values:
self.item_map[item_index] = item
# Creating similarity items
items = items.merge(self.item_lookup, on=[self.item_col], sort=False)
items = items.drop(items.columns[:2], axis=1)
# Reindexing items dataframe
cols = list(items.columns)
items = items[cols[-1:] + cols[:-1]]
self.items = items
def generate_label(self, df, col):
dist_labels = df[[col]].drop_duplicates()
dist_labels[col + "_index"] = dist_labels[col].astype("category").cat.codes
return dist_labels
def classifier_fit(self, X, y, test):
classifier = KNeighborsClassifier(n_neighbors=self.nearest_k, metric=self.metric)
classifier.fit(X, y)
return classifier.kneighbors(test)
def predict_active(self, pred_df,
k=10,
weight_hybrid=.2,
hybrid_model=True):
act_df = pred_df.merge(self.item_lookup, on=[self.item_col], sort=False)
# active_user = pred_df['user_id'].unique()
pred_df = pred_df[[self.user_col]].drop_duplicates()
act_df = act_df[[self.item_id_col, self.score_col]]
# ----------------------------------------------------------------------
active_items = self.items.merge(act_df, on=[self.item_id_col], sort=False)
inactive_items = self.items[~self.items['app_id_index'].isin(act_df['app_id_index'])]
_output_preds = []
_score_preds = []
# Fitting using Features
X = active_items.iloc[:, 1:-1]
y = active_items.iloc[:, -1]
test = inactive_items.iloc[:, 1:]
try:
output = self.classifier_fit(X, y, test)
except ValueError as err:
return err
rating = y.loc[output[1].flatten()].values.reshape(output[1].shape)
result = np.sum(rating * output[0], axis=1) / self.nearest_k
self.preds_tensor_ = result
top_tensor = torch.from_numpy(result).topk(k)
indices = top_tensor.indices.tolist()
score = top_tensor.values
_output_preds.append( [self.item_map[_id] for _id in indices] )
if hybrid_model:
score = score * weight_hybrid
_score_preds.append( score.tolist() )
pred_df['predicted_items'] = _output_preds
pred_df['predicted_score'] = _score_preds
escaped_id = [
ele for i_list in pred_df['predicted_items'].values for ele in i_list
]
escaped_score = [
score for s_list in pred_df['predicted_score'].values for score in s_list
]
pred_result = pd.DataFrame({
'app_id' : escaped_id,
'predicted_score' : escaped_score
})
return pred_result
def cbf_model(pred_df, k=10):
# items = pd.read_csv("data/games_attributes.csv")
items = pd.read_csv("data/all_games_attributes.csv")
cbf = KnnCBF(items)
res = cbf.predict_active(pred_df=pred_df, k=k)
return res
|