Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import torch | |
from sklearn.neighbors import KNeighborsClassifier | |
class KnnCBF: | |
def __init__(self, items, | |
user_col='user_id', | |
item_col='app_id', | |
score_col='is_recommended', | |
nearest_k=2, | |
metric="cosine"): | |
""" | |
Args: | |
items: (DataFrame) games dataframe contain tags attribute | |
user_col: (String) column name of users column | |
item_col: (String) column name of items column | |
score_col: (String) column name of interactions column | |
k_nearest: (Integer) number of nearest interacted items for similarity | |
""" | |
self.user_col = user_col | |
self.item_col = item_col | |
self.score_col = score_col | |
self.nearest_k = nearest_k | |
self.metric = metric | |
self.user_id_col = user_col + "_index" | |
self.item_id_col = item_col + "_index" | |
self.item_lookup = self.generate_label(items, self.item_col) | |
self.item_map = {} | |
for item, item_index in self.item_lookup.values: | |
self.item_map[item_index] = item | |
# Creating similarity items | |
items = items.merge(self.item_lookup, on=[self.item_col], sort=False) | |
items = items.drop(items.columns[:2], axis=1) | |
# Reindexing items dataframe | |
cols = list(items.columns) | |
items = items[cols[-1:] + cols[:-1]] | |
self.items = items | |
def generate_label(self, df, col): | |
dist_labels = df[[col]].drop_duplicates() | |
dist_labels[col + "_index"] = dist_labels[col].astype("category").cat.codes | |
return dist_labels | |
def classifier_fit(self, X, y, test): | |
classifier = KNeighborsClassifier(n_neighbors=self.nearest_k, metric=self.metric) | |
classifier.fit(X, y) | |
return classifier.kneighbors(test) | |
def predict_active(self, pred_df, | |
k=10, | |
weight_hybrid=.2, | |
hybrid_model=True): | |
act_df = pred_df.merge(self.item_lookup, on=[self.item_col], sort=False) | |
# active_user = pred_df['user_id'].unique() | |
pred_df = pred_df[[self.user_col]].drop_duplicates() | |
act_df = act_df[[self.item_id_col, self.score_col]] | |
# ---------------------------------------------------------------------- | |
active_items = self.items.merge(act_df, on=[self.item_id_col], sort=False) | |
inactive_items = self.items[~self.items['app_id_index'].isin(act_df['app_id_index'])] | |
_output_preds = [] | |
_score_preds = [] | |
# Fitting using Features | |
X = active_items.iloc[:, 1:-1] | |
y = active_items.iloc[:, -1] | |
test = inactive_items.iloc[:, 1:] | |
try: | |
output = self.classifier_fit(X, y, test) | |
except ValueError as err: | |
return err | |
rating = y.loc[output[1].flatten()].values.reshape(output[1].shape) | |
result = np.sum(rating * output[0], axis=1) / self.nearest_k | |
self.preds_tensor_ = result | |
top_tensor = torch.from_numpy(result).topk(k) | |
indices = top_tensor.indices.tolist() | |
score = top_tensor.values | |
_output_preds.append( [self.item_map[_id] for _id in indices] ) | |
if hybrid_model: | |
score = score * weight_hybrid | |
_score_preds.append( score.tolist() ) | |
pred_df['predicted_items'] = _output_preds | |
pred_df['predicted_score'] = _score_preds | |
escaped_id = [ | |
ele for i_list in pred_df['predicted_items'].values for ele in i_list | |
] | |
escaped_score = [ | |
score for s_list in pred_df['predicted_score'].values for score in s_list | |
] | |
pred_result = pd.DataFrame({ | |
'app_id' : escaped_id, | |
'predicted_score' : escaped_score | |
}) | |
return pred_result | |
def cbf_model(pred_df, k=10): | |
# items = pd.read_csv("data/games_attributes.csv") | |
items = pd.read_csv("data/all_games_attributes.csv") | |
cbf = KnnCBF(items) | |
res = cbf.predict_active(pred_df=pred_df, k=k) | |
return res | |