File size: 9,547 Bytes
1794e4f c8a77b2 5810688 c8a77b2 304bada c8a77b2 a22458b c8a77b2 a22458b c8a77b2 a22458b 3c5e633 c8a77b2 a22458b c8a77b2 a22458b c8a77b2 a22458b c8a77b2 85b7407 c8a77b2 1794e4f a22458b c8a77b2 2187b60 3c5e633 2187b60 3c5e633 c8a77b2 a22458b 304bada a22458b 304bada a22458b 304bada a22458b 2187b60 c8a77b2 a22458b 2187b60 c8a77b2 2187b60 c8a77b2 5810688 a22458b 304bada a22458b 304bada a22458b 304bada 3c5e633 a22458b 3c5e633 a22458b 5810688 a22458b c8a77b2 a22458b 85b7407 a22458b 85b7407 5810688 a22458b c8a77b2 a22458b 85b7407 a22458b c8a77b2 a22458b c8a77b2 a22458b 2187b60 a22458b 2187b60 a22458b 304bada 3c5e633 2187b60 a22458b 2187b60 a22458b 3c5e633 a22458b 2187b60 a22458b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
import streamlit.components.v1 as components
import streamlit as st
from random import randrange, uniform
import pandas as pd
import logging
import numpy as np
import random
from datetime import datetime, timedelta
from babel.numbers import format_currency
# Column names for data input
COL_NAMES = [
"Transaction date",
"Transaction type",
"Amount transferred",
"Sender's initial balance",
"Sender's new balance",
"Recipient's initial balance",
"Recipient's new balance",
"Sender exactly credited",
"Receiver exactly credited",
"Large amount",
"Frequent receiver",
"Merchant receiver",
"Sender ID",
"Receiver ID",
]
# Texts for explanation
feature_texts = {
0: "Date of transaction",
1: "Amount transferred",
2: "Initial balance of sender",
3: "New balance of sender",
4: "Initial balance of recipient",
5: "New balance of recipient",
6: "Sender's balance was exactly credited",
7: "Receiver's balance was exactly credited",
8: "Large amount",
9: "Frequent receiver of transactions",
10: "Receiver is merchant",
11: "Sender ID",
12: "Receiver ID",
13: "Transaction type is Cash out",
14: "Transaction type is Transfer",
15: "Transaction type is Payment",
16: "Transaction type is Cash in",
17: "Transaction type is Debit",
}
# categories for one hot encoding
CATEGORIES = np.array(["CASH_OUT", "TRANSFER", "PAYMENT", "CASH_IN", "DEBIT"])
# one hot encoding
def transformation(input, categories):
new_x = input
cat = np.array(input[1])
del new_x[1]
result_array = np.zeros(5, dtype=int)
match_index = np.where(categories == cat)[0]
result_array[match_index] = 1
new_x.extend(result_array.tolist())
python_objects = [
np_type.item() if isinstance(np_type, np.generic) else np_type
for np_type in new_x
]
return python_objects
# func to make the request body in the right format for the client
def get_request_body(datapoint):
data = datapoint.iloc[0].tolist()
instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
request_body = {"instances": [instances]}
return request_body
# func for sorting and retrieving the explanation texts
def get_explainability_texts(shap_values, feature_texts):
# Separate positive and negative values, keep indices corresponding to keys
positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
# Sort dictionaries based on the magnitude of values
sorted_positive_indices = [
index
for index, _ in sorted(
positive_dict.items(), key=lambda item: abs(item[1]), reverse=True
)
]
positive_texts = [feature_texts[x] for x in sorted_positive_indices]
positive_texts = positive_texts[2:]
sorted_positive_indices = sorted_positive_indices[2:]
if len(positive_texts) > 5:
positive_texts = positive_texts[:5]
sorted_positive_indices = sorted_positive_indices[:5]
return positive_texts, sorted_positive_indices
# func to generate random date from the past year to replace var "steps" with
# in the input data, to make it more understandable
def random_past_date_from_last_year():
one_year_ago = datetime.now() - timedelta(days=365)
random_days = random.randint(0, (datetime.now() - one_year_ago).days)
random_date = one_year_ago + timedelta(days=random_days)
return random_date.strftime("%Y-%m-%d")
# func for retrieving the values for explanations, requires some data engineering
def get_explainability_values(pos_indices, data):
rounded_data = [
round(value, 2) if isinstance(value, float) else value for value in data
]
transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
vals = []
for idx in pos_indices:
if idx in range(6, 11) or idx in range(13, 18):
val = str(bool(transformed_data[idx])).capitalize()
else:
val = transformed_data[idx]
vals.append(val)
return vals
# func to modify the values of currency to make it more similar to euro
def modify_datapoint(
datapoint,
): # should return list, with correct numbers/amounts, and date
data = datapoint.iloc[0].tolist()
data[0] = random_past_date_from_last_year()
modified_amounts = data.copy()
if any(val > 12000 for val in data[2:7]):
modified_amounts[2:7] = [
value / 100 if value != 0 else 0 for value in data[2:7]
]
if any(val > 120000 for val in modified_amounts[2:7]):
new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
modified_amounts[2:7] = new_list
rounded_data = [
round(value, 2) if isinstance(value, float) else value
for value in modified_amounts
]
rounded_data[2:7] = [
format_currency(value, "EUR", locale="en_GB") for value in rounded_data[2:7]
]
return rounded_data
# func to retireve the weights of the features to be presented as explanation
def get_weights(shap_values, sorted_indices, target_sum=0.95):
weights = [shap_values[x] for x in sorted_indices]
total_sum = sum(weights)
# Scale to the target sum (0.95 in this case)
scaled_values = [val * (target_sum / total_sum) for val in weights]
return scaled_values
# func to generate a fake certainty for the model to make it more realistic
def get_fake_certainty():
# Generate a random certainty between 75% and 99%
fake_certainty = uniform(0.75, 0.99)
formatted_fake_certainty = "{:.2%}".format(fake_certainty)
return formatted_fake_certainty
# func to get a datapoint marked as fraud in the dataset to be passed to the model
def get_random_suspicious_transaction(data):
suspicious_data = data[data["isFraud"] == 1]
max_n = len(suspicious_data)
random_nr = randrange(max_n)
suspicious_transaction = suspicious_data[random_nr - 1 : random_nr].drop(
"isFraud", axis=1
)
return suspicious_transaction
# func to send the evaluation to Deeploy
def send_evaluation(
client, deployment_id, request_log_id, prediction_log_id, evaluation_input
):
"""Send evaluation to Deeploy."""
try:
with st.spinner("Submitting response..."):
# Call the explain endpoint as it also includes the prediction
client.evaluate(
deployment_id,
prediction_log_id,
evaluation_input
)
return True
except Exception as e:
logging.error(e)
st.error(
"Failed to submit feedback."
+ "Check whether you are using the right model URL and Token. "
+ "Contact Deeploy if the problem persists."
)
st.write(f"Error message: {e}")
# func to retrieve model url and important vars for Deeploy client
def get_model_url():
"""Get model url and retrieve workspace id and deployment id from it"""
model_url = st.text_area(
"Model URL (default is the demo deployment)",
"https://api.app.deeploy.ml/workspaces/708b5808-27af-461a-8ee5-80add68384c7/deployments/ac56dbdf-ba04-462f-aa70-5a0d18698e42/",
height=125,
)
elems = model_url.split("/")
try:
workspace_id = elems[4]
deployment_id = elems[6]
except IndexError:
workspace_id = ""
deployment_id = ""
return model_url, workspace_id, deployment_id
# func to create the prefilled text for the disagree button
def get_comment_explanation(certainty, explainability_texts, explainability_values):
cleaned = [x.replace(":", "") for x in explainability_texts]
fi = [f"{cleaned[i]} is {x}" for i, x in enumerate(explainability_values)]
fi.insert(0, "Important suspicious features: ")
result = "\n".join(fi)
comment = f"Model certainty is {certainty}" + "\n" "\n" + result
return comment
# func to create the data input table
def create_data_input_table(data, col_names):
st.subheader("Transaction details")
data[7:12] = [bool(value) for value in data[7:12]]
rounded_list = [
round(value, 2) if isinstance(value, float) else value for value in data
]
df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list})
st.dataframe(
df, hide_index=True, width=475, height=35 * len(df) + 38
) # use_container_width=True
# func to create the explanation table
def create_table(texts, values, weights, title):
df = pd.DataFrame(
{"Feature Explanation": texts, "Value": values, "Weight": weights}
)
st.markdown(f"#### {title}") # Markdown for styling
st.dataframe(
df,
hide_index=True,
width=475,
column_config={
"Weight": st.column_config.ProgressColumn(
"Weight", width="small", format="%.2f", min_value=0, max_value=1
)
},
) # use_container_width=True
# func to change button colors
def ChangeButtonColour(widget_label, font_color, background_color="transparent"):
htmlstr = f"""
<script>
var elements = window.parent.document.querySelectorAll('button');
for (var i = 0; i < elements.length; ++i) {{
if (elements[i].innerText == '{widget_label}') {{
elements[i].style.color ='{font_color}';
elements[i].style.background = '{background_color}'
}}
}}
</script>
"""
components.html(f"{htmlstr}", height=0, width=0)
|