|
import numpy as np |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import joblib |
|
from sklearn.tree import DecisionTreeClassifier, XGBClassifier |
|
from sklearn.model_selection import train_test_split |
|
|
|
import os |
|
import shutil |
|
|
|
|
|
fhe_directory = '/tmp/fhe_client_server_files/' |
|
|
|
|
|
if not os.path.exists(fhe_directory): |
|
os.makedirs(fhe_directory) |
|
else: |
|
|
|
shutil.rmtree(fhe_directory) |
|
os.makedirs(fhe_directory) |
|
|
|
data=pd.read_csv('data/heart.xls') |
|
|
|
|
|
data.info() |
|
|
|
data_corr=data.corr() |
|
|
|
plt.figure(figsize=(20,20)) |
|
sns.heatmap(data=data_corr,annot=True) |
|
|
|
""" |
|
# Get the Data |
|
X_train, y_train, X_val, y_val = train_test_split() |
|
classifier = XGBClassifier() |
|
# Training the Model |
|
classifier = classifier.fit(X_train, y_train) |
|
# Trained Model Evaluation on Validation Dataset |
|
confidence = classifier.score(X_val, y_val) |
|
# Validation Data Prediction |
|
y_pred = classifier.predict(X_val) |
|
# Model Validation Accuracy |
|
accuracy = accuracy_score(y_val, y_pred) |
|
# Model Confusion Matrix |
|
conf_mat = confusion_matrix(y_val, y_pred) |
|
# Model Classification Report |
|
clf_report = classification_report(y_val, y_pred) |
|
# Model Cross Validation Score |
|
score = cross_val_score(classifier, X_val, y_val, cv=3) |
|
|
|
try: |
|
# Load Trained Model |
|
clf = load(str(self.model_save_path + saved_model_name + ".joblib")) |
|
except Exception as e: |
|
print("Model not found...") |
|
|
|
if test_data is not None: |
|
result = clf.predict(test_data) |
|
print(result) |
|
else: |
|
result = clf.predict(self.test_features) |
|
accuracy = accuracy_score(self.test_labels, result) |
|
clf_report = classification_report(self.test_labels, result) |
|
print(accuracy, clf_report) |
|
""" |
|
|
|
feature_value=np.array(data_corr['output']) |
|
for i in range(len(feature_value)): |
|
if feature_value[i]<0: |
|
feature_value[i]=-feature_value[i] |
|
|
|
print(feature_value) |
|
|
|
features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation']) |
|
|
|
feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False) |
|
|
|
feature_selected=feature_sorted.index |
|
|
|
feature_selected |
|
|
|
clean_data=data[feature_selected] |
|
|
|
|
|
X=clean_data.iloc[:,1:] |
|
Y=clean_data['output'] |
|
|
|
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0) |
|
|
|
print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) |
|
|
|
|
|
from sklearn.preprocessing import StandardScaler |
|
sc=StandardScaler() |
|
x_train=sc.fit_transform(x_train) |
|
x_test=sc.transform(x_test) |
|
|
|
|
|
dt=XGBClassifier(criterion='entropy',max_depth=6) |
|
dt.fit(x_train,y_train) |
|
|
|
|
|
|
|
y_pred=dt.predict(x_test) |
|
|
|
|
|
from sklearn.metrics import confusion_matrix |
|
conf_mat=confusion_matrix(y_test,y_pred) |
|
print(conf_mat) |
|
accuracy=dt.score(x_test,y_test) |
|
print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%") |
|
|
|
joblib.dump(dt, 'heart_disease_dt_model.pkl') |
|
|
|
from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier |
|
from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier |
|
|
|
fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10) |
|
fhe_compatible.compile(x_train) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer |
|
|
|
|
|
dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible) |
|
dev.save() |
|
|
|
|
|
server = FHEModelServer(path_dir=fhe_directory) |
|
server.load() |
|
|