File size: 9,547 Bytes
1794e4f
c8a77b2
5810688
 
c8a77b2
 
304bada
 
 
c8a77b2
a22458b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8a77b2
a22458b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8a77b2
 
 
 
 
 
 
 
a22458b
 
 
 
3c5e633
c8a77b2
a22458b
 
c8a77b2
 
 
a22458b
c8a77b2
 
 
a22458b
c8a77b2
85b7407
c8a77b2
1794e4f
a22458b
 
 
 
 
 
c8a77b2
2187b60
3c5e633
2187b60
 
3c5e633
c8a77b2
 
 
a22458b
 
304bada
 
 
 
a22458b
 
304bada
a22458b
304bada
a22458b
 
 
2187b60
c8a77b2
 
a22458b
2187b60
c8a77b2
2187b60
c8a77b2
 
5810688
a22458b
 
 
 
 
304bada
 
 
 
a22458b
 
 
304bada
 
 
a22458b
 
 
 
 
 
 
304bada
3c5e633
a22458b
 
3c5e633
 
 
 
 
 
 
a22458b
 
5810688
 
 
 
 
 
 
a22458b
c8a77b2
a22458b
 
 
85b7407
a22458b
 
85b7407
5810688
 
a22458b
 
 
 
c8a77b2
 
 
 
a22458b
85b7407
 
 
a22458b
c8a77b2
 
 
 
 
 
 
 
 
 
 
a22458b
c8a77b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a22458b
 
2187b60
a22458b
 
 
 
 
2187b60
 
a22458b
 
304bada
3c5e633
2187b60
a22458b
 
 
 
 
 
 
2187b60
a22458b
 
3c5e633
a22458b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2187b60
 
 
 
 
 
 
 
 
 
 
a22458b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import streamlit.components.v1 as components
import streamlit as st
from random import randrange, uniform
import pandas as pd
import logging
import numpy as np
import random
from datetime import datetime, timedelta
from babel.numbers import format_currency

# Column names for data input
COL_NAMES = [
    "Transaction date",
    "Transaction type",
    "Amount transferred",
    "Sender's initial balance",
    "Sender's new balance",
    "Recipient's initial balance",
    "Recipient's new balance",
    "Sender exactly credited",
    "Receiver exactly credited",
    "Large amount",
    "Frequent receiver",
    "Merchant receiver",
    "Sender ID",
    "Receiver ID",
]

# Texts for explanation
feature_texts = {
    0: "Date of transaction",
    1: "Amount transferred",
    2: "Initial balance of sender",
    3: "New balance of sender",
    4: "Initial balance of recipient",
    5: "New balance of recipient",
    6: "Sender's balance was exactly credited",
    7: "Receiver's balance was exactly credited",
    8: "Large amount",
    9: "Frequent receiver of transactions",
    10: "Receiver is merchant",
    11: "Sender ID",
    12: "Receiver ID",
    13: "Transaction type is Cash out",
    14: "Transaction type is Transfer",
    15: "Transaction type is Payment",
    16: "Transaction type is Cash in",
    17: "Transaction type is Debit",
}

# categories for one hot encoding
CATEGORIES = np.array(["CASH_OUT", "TRANSFER", "PAYMENT", "CASH_IN", "DEBIT"])


# one hot encoding
def transformation(input, categories):
    new_x = input
    cat = np.array(input[1])
    del new_x[1]
    result_array = np.zeros(5, dtype=int)
    match_index = np.where(categories == cat)[0]
    result_array[match_index] = 1
    new_x.extend(result_array.tolist())
    python_objects = [
        np_type.item() if isinstance(np_type, np.generic) else np_type
        for np_type in new_x
    ]
    return python_objects


# func to make the request body in the right format for the client
def get_request_body(datapoint):
    data = datapoint.iloc[0].tolist()
    instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
    request_body = {"instances": [instances]}
    return request_body


# func for sorting and retrieving the explanation texts
def get_explainability_texts(shap_values, feature_texts):
    # Separate positive and negative values, keep indices corresponding to keys
    positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
    # Sort dictionaries based on the magnitude of values
    sorted_positive_indices = [
        index
        for index, _ in sorted(
            positive_dict.items(), key=lambda item: abs(item[1]), reverse=True
        )
    ]
    positive_texts = [feature_texts[x] for x in sorted_positive_indices]
    positive_texts = positive_texts[2:]
    sorted_positive_indices = sorted_positive_indices[2:]
    if len(positive_texts) > 5:
        positive_texts = positive_texts[:5]
        sorted_positive_indices = sorted_positive_indices[:5]
    return positive_texts, sorted_positive_indices


# func to generate random date from the past year to replace var "steps" with
# in the input data, to make it more understandable
def random_past_date_from_last_year():
    one_year_ago = datetime.now() - timedelta(days=365)
    random_days = random.randint(0, (datetime.now() - one_year_ago).days)
    random_date = one_year_ago + timedelta(days=random_days)
    return random_date.strftime("%Y-%m-%d")


# func for retrieving the values for explanations, requires some data engineering
def get_explainability_values(pos_indices, data):
    rounded_data = [
        round(value, 2) if isinstance(value, float) else value for value in data
    ]
    transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
    vals = []
    for idx in pos_indices:
        if idx in range(6, 11) or idx in range(13, 18):
            val = str(bool(transformed_data[idx])).capitalize()
        else:
            val = transformed_data[idx]
        vals.append(val)
    return vals


# func to modify the values of currency to make it more similar to euro
def modify_datapoint(
    datapoint,
):  # should return list, with correct numbers/amounts, and date
    data = datapoint.iloc[0].tolist()
    data[0] = random_past_date_from_last_year()
    modified_amounts = data.copy()
    if any(val > 12000 for val in data[2:7]):
        modified_amounts[2:7] = [
            value / 100 if value != 0 else 0 for value in data[2:7]
        ]
    if any(val > 120000 for val in modified_amounts[2:7]):
        new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
        modified_amounts[2:7] = new_list
    rounded_data = [
        round(value, 2) if isinstance(value, float) else value
        for value in modified_amounts
    ]
    rounded_data[2:7] = [
        format_currency(value, "EUR", locale="en_GB") for value in rounded_data[2:7]
    ]
    return rounded_data


# func to retireve the weights of the features to be presented as explanation
def get_weights(shap_values, sorted_indices, target_sum=0.95):
    weights = [shap_values[x] for x in sorted_indices]
    total_sum = sum(weights)
    # Scale to the target sum (0.95 in this case)
    scaled_values = [val * (target_sum / total_sum) for val in weights]
    return scaled_values


# func to generate a fake certainty for the model to make it more realistic
def get_fake_certainty():
    # Generate a random certainty between 75% and 99%
    fake_certainty = uniform(0.75, 0.99)
    formatted_fake_certainty = "{:.2%}".format(fake_certainty)
    return formatted_fake_certainty


# func to get a datapoint marked as fraud in the dataset to be passed to the model
def get_random_suspicious_transaction(data):
    suspicious_data = data[data["isFraud"] == 1]
    max_n = len(suspicious_data)
    random_nr = randrange(max_n)
    suspicious_transaction = suspicious_data[random_nr - 1 : random_nr].drop(
        "isFraud", axis=1
    )
    return suspicious_transaction


# func to send the evaluation to Deeploy
def send_evaluation(
    client, deployment_id, request_log_id, prediction_log_id, evaluation_input
):
    """Send evaluation to Deeploy."""
    try:
        with st.spinner("Submitting response..."):
            # Call the explain endpoint as it also includes the prediction
            client.evaluate(
                deployment_id, 
                prediction_log_id, 
                evaluation_input
            )
        return True
    except Exception as e:
        logging.error(e)
        st.error(
            "Failed to submit feedback."
            + "Check whether you are using the right model URL and Token. "
            + "Contact Deeploy if the problem persists."
        )
        st.write(f"Error message: {e}")


# func to retrieve model url and important vars for Deeploy client
def get_model_url():
    """Get model url and retrieve workspace id and deployment id from it"""
    model_url = st.text_area(
        "Model URL (default is the demo deployment)",
        "https://api.app.deeploy.ml/workspaces/708b5808-27af-461a-8ee5-80add68384c7/deployments/ac56dbdf-ba04-462f-aa70-5a0d18698e42/",
        height=125,
    )
    elems = model_url.split("/")
    try:
        workspace_id = elems[4]
        deployment_id = elems[6]
    except IndexError:
        workspace_id = ""
        deployment_id = ""
    return model_url, workspace_id, deployment_id


# func to create the prefilled text for the disagree button
def get_comment_explanation(certainty, explainability_texts, explainability_values):
    cleaned = [x.replace(":", "") for x in explainability_texts]
    fi = [f"{cleaned[i]} is {x}" for i, x in enumerate(explainability_values)]
    fi.insert(0, "Important suspicious features: ")
    result = "\n".join(fi)
    comment = f"Model certainty is {certainty}" + "\n" "\n" + result
    return comment


# func to create the data input table
def create_data_input_table(data, col_names):
    st.subheader("Transaction details")
    data[7:12] = [bool(value) for value in data[7:12]]
    rounded_list = [
        round(value, 2) if isinstance(value, float) else value for value in data
    ]
    df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list})
    st.dataframe(
        df, hide_index=True, width=475, height=35 * len(df) + 38
    )  # use_container_width=True


# func to create the explanation table
def create_table(texts, values, weights, title):
    df = pd.DataFrame(
        {"Feature Explanation": texts, "Value": values, "Weight": weights}
    )
    st.markdown(f"#### {title}")  # Markdown for styling
    st.dataframe(
        df,
        hide_index=True,
        width=475,
        column_config={
            "Weight": st.column_config.ProgressColumn(
                "Weight", width="small", format="%.2f", min_value=0, max_value=1
            )
        },
    )  # use_container_width=True


# func to change button colors
def ChangeButtonColour(widget_label, font_color, background_color="transparent"):
    htmlstr = f"""
        <script>
            var elements = window.parent.document.querySelectorAll('button');
            for (var i = 0; i < elements.length; ++i) {{ 
                if (elements[i].innerText == '{widget_label}') {{ 
                    elements[i].style.color ='{font_color}';
                    elements[i].style.background = '{background_color}'
                }}
            }}
        </script>
        """
    components.html(f"{htmlstr}", height=0, width=0)