Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /datasets /lime_encoder.py

erasmopurif

First commit

d2a8669 almost 2 years ago

raw

history blame

6.87 kB

	import sklearn.preprocessing
	import numpy as np

	from aif360.algorithms import Transformer


	class LimeEncoder(Transformer):
	"""Tranformer for converting aif360 dataset to LIME dataset and vice versa.

	(LIME - Local Interpretable Model-Agnostic Explanations) [2]_

	See for details/usage:
	https://github.com/marcotcr/lime

	References:
	.. [2] M.T. Ribeiro, S. Singh, and C. Guestrin, '"Why should I trust
	you?" Explaining the predictions of any classifier.'
	https://arxiv.org/pdf/1602.04938v1.pdf
	"""

	def __init__(self):
	super(LimeEncoder, self).__init__()

	def fit(self, dataset):
	"""Take an aif360 dataset and save all relevant metadata as well as
	mappings needed to transform/inverse_transform the data between aif360
	and lime.

	Args:
	dataset (BinaryLabelDataset): aif360 dataset

	Returns:
	LimeEncoder: Returns self.
	"""
	self.s_feature_names_with_one_hot_encoding = dataset.feature_names
	df, df_dict = dataset.convert_to_dataframe(de_dummy_code=True)

	dfc = df.drop(dataset.label_names[0], axis=1) # remove label (class) column

	self.s_feature_names = list(dfc.columns) # create list of feature names
	self.s_data = dfc.values # create array of feature values

	# since categorical features are 1-hot-encoded and their names changed,
	# the set diff gives us the list of categorical features as non-
	# categorical feature names are not changed
	self.s_categorical_features = list(set(self.s_feature_names)
	- set(self.s_feature_names_with_one_hot_encoding))

	self.s_protected_attribute_names = dataset.protected_attribute_names

	# add protected attribute names to the list of categorical features
	self.s_categorical_features = self.s_categorical_features \
	+ self.s_protected_attribute_names

	self.s_labels = df[dataset.label_names[0]] # create labels

	# following 3 lines are not really needed
	# using to create s_class_names..can do so manually as well ...array([ 0., 1.])
	s_le = sklearn.preprocessing.LabelEncoder()
	s_le.fit(self.s_labels)
	# self.s_labels = s_le.transform(self.s_labels)
	self.s_class_names = s_le.classes_

	# convert s_categorical_features to a list of array indexes in
	# s_feature_names corresponding to categorical features
	# (NOTE - does not included protected attributes)
	self.s_categorical_features = [self.s_feature_names.index(x)
	for x in self.s_categorical_features]

	# map all the categorical features to numerical values and store the
	# mappings in s_categorical_names
	self.s_categorical_names = {}
	for feature in self.s_categorical_features:
	self.le = sklearn.preprocessing.LabelEncoder()
	self.le.fit(self.s_data[:, feature])
	#self.s_data[:, feature] = le.transform(self.s_data[:, feature])
	self.s_categorical_names[feature] = self.le.classes_

	return self

	def transform(self, aif360data):
	"""Take aif360 data array and return data array that is lime encoded
	(numeric array in which categorical features are NOT one-hot-encoded).

	Args:
	aif360data (np.ndarray): Dataset features

	Returns:
	np.ndarray: LIME dataset features
	"""
	tgtNumRows = aif360data.shape[0]
	tgtNumcolumns = len(self.s_feature_names)
	limedata = np.zeros(shape=(tgtNumRows, tgtNumcolumns))

	# non_categorical_features = list(set(self.s_feature_names) & set(self.s_feature_names_with_one_hot_encoding))
	for rw in range(limedata.shape[0]):
	for ind, feature in enumerate(self.s_feature_names):
	if ind in self.s_categorical_features:
	# tranform the value since categorical feature except if it
	# is also a protected attribute
	if feature in self.s_protected_attribute_names:
	# just copy the value as is
	limedata[rw, ind] = aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)]
	else:
	possible_feature_values = self.s_categorical_names[ind]
	for indc in range(len(possible_feature_values)):
	cval = possible_feature_values[indc]
	colName = feature + "=" + cval
	if (aif360data[rw][self.s_feature_names_with_one_hot_encoding.index(colName)] == 1.0):
	limedata[rw][ind] = indc
	else:
	# just copy the value as is
	limedata[rw, ind] = aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)]

	return limedata

	def inverse_transform(self, limedata):
	"""Take data array that is lime encoded (that is, lime-compatible data
	created by this class from a given aif360 dataset) and return data array
	consistent with the original aif360 dataset.

	Args:
	limedata (np.ndarray): Dataset features

	Returns:
	np.ndarray: aif360 dataset features
	"""
	tgtNumRows = limedata.shape[0]
	tgtNumcolumns = len(self.s_feature_names_with_one_hot_encoding)
	aif360data = np.zeros(shape=(tgtNumRows, tgtNumcolumns))

	for rw in range(aif360data.shape[0]):
	for ind, feature in enumerate(self.s_feature_names):
	# s_categorical_features has list of indexes into
	# s_feature_names for categorical features
	if ind in self.s_categorical_features:
	if feature in self.s_protected_attribute_names:
	# just copy the value as is
	aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)] = limedata[rw, ind]
	else:
	# s_categorical_names[ind] has mapping of categorical to
	# numerical values i.e. limedata[rw, ind] is index of
	# this array. value is string val
	new_feature = feature + '=' + self.s_categorical_names[ind][int(limedata[rw, ind])]
	# categorical feature:
	aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(new_feature)] = 1.0
	else: # just copy value
	aif360data[rw, self.s_feature_names_with_one_hot_encoding.index(feature)] = limedata[rw, ind]

	return aif360data