File size: 4,300 Bytes
b826d7a
 
 
 
 
 
 
 
 
 
 
39bb071
b826d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c3c7e9
 
39bb071
9c3c7e9
 
b826d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c3c7e9
b826d7a
 
9c3c7e9
 
 
 
b826d7a
 
9c3c7e9
b826d7a
48b0e4b
 
 
 
 
 
 
 
 
 
 
b826d7a
48b0e4b
b826d7a
 
99c7f3b
 
1c645c1
b826d7a
 
1c645c1
b826d7a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import pandas as pd
import torch

from sklearn.neighbors import KNeighborsClassifier

class KnnCBF:
    def __init__(self, items, 

                user_col='user_id', 

                item_col='app_id',

                score_col='is_recommended',

                nearest_k=2,

                metric="cosine"):
        """

        Args:

            items: (DataFrame) games dataframe contain tags attribute

            user_col: (String) column name of users column

            item_col: (String) column name of items column

            score_col: (String) column name of interactions column

            k_nearest: (Integer) number of nearest interacted items for similarity

        """
        
        self.user_col = user_col
        self.item_col = item_col
        self.score_col = score_col
        self.nearest_k = nearest_k
        self.metric = metric

        self.user_id_col = user_col + "_index"
        self.item_id_col = item_col + "_index"

        self.item_lookup = self.generate_label(items, self.item_col)

        self.item_map = {}
        for item, item_index in self.item_lookup.values:
            self.item_map[item_index] = item

        # Creating similarity items
        items = items.merge(self.item_lookup, on=[self.item_col], sort=False)
        items = items.drop(items.columns[:2], axis=1)

        # Reindexing items dataframe
        cols = list(items.columns)
        items = items[cols[-1:] + cols[:-1]]

        self.items = items

    def generate_label(self, df, col):
        dist_labels = df[[col]].drop_duplicates()
        dist_labels[col + "_index"] = dist_labels[col].astype("category").cat.codes

        return dist_labels

    def classifier_fit(self, X, y, test):
        classifier = KNeighborsClassifier(n_neighbors=self.nearest_k, metric=self.metric)
        classifier.fit(X, y)

        return classifier.kneighbors(test)
    
    def predict_active(self, pred_df, 

                    k=10,

                    weight_hybrid=.2,

                    hybrid_model=True):
        
        act_df = pred_df.merge(self.item_lookup, on=[self.item_col], sort=False)
        # active_user = pred_df['user_id'].unique()
        pred_df = pred_df[[self.user_col]].drop_duplicates()

        act_df = act_df[[self.item_id_col, self.score_col]]       
        # ----------------------------------------------------------------------  
        
        active_items = self.items.merge(act_df, on=[self.item_id_col], sort=False)
        inactive_items = self.items[~self.items['app_id_index'].isin(act_df['app_id_index'])]

        _output_preds = []
        _score_preds = []

        # Fitting using Features
        X = active_items.iloc[:, 1:-1]
        y = active_items.iloc[:, -1]
        test = inactive_items.iloc[:, 1:]

        try:
            output = self.classifier_fit(X, y, test)
        except ValueError as err:
            return err

        rating = y.loc[output[1].flatten()].values.reshape(output[1].shape)
        result = np.sum(rating * output[0], axis=1) / self.nearest_k

        self.preds_tensor_ = result

        top_tensor = torch.from_numpy(result).topk(k)
        indices = top_tensor.indices.tolist()
        score = top_tensor.values

        _output_preds.append( [self.item_map[_id] for _id in indices] )
        if hybrid_model:
            score = score * weight_hybrid

        _score_preds.append( score.tolist() )

        pred_df['predicted_items'] = _output_preds
        pred_df['predicted_score'] = _score_preds

        escaped_id = [
            ele for i_list in pred_df['predicted_items'].values for ele in i_list
        ]
        escaped_score = [
            score for s_list in pred_df['predicted_score'].values for score in s_list
        ]

        pred_result = pd.DataFrame({
            'app_id' : escaped_id,
            'predicted_score' : escaped_score
        })
        
        return pred_result
    
def cbf_model(pred_df, k=10):
    # items = pd.read_csv("data/games_attributes.csv")
    items = pd.read_csv("data/all_games_attributes.csv")
    
    cbf = KnnCBF(items)
    res = cbf.predict_active(pred_df=pred_df, k=k)
    
    return res