# importing libraries from datasets import load_dataset, load_dataset_builder import numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report import imblearn from imblearn.under_sampling import RandomUnderSampler from skops import hub_utils import pickle from skops.card import Card, metadata_from_config from pathlib import Path from tempfile import mkdtemp, mkstemp # Loading the dataset dataset_name = "saifhmb/FraudPaymentData" dataset = load_dataset(dataset_name, split = 'train') dataset = pd.DataFrame(dataset) dataset = dataset.dropna() dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) # deleting high cardinality features y = dataset.iloc[:, 5].values dataset = dataset.drop(['Label'], axis = 1) dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature # Encoding the Independent Variables categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type'] onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False) categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)]) numericalColumns = dataset.select_dtypes(include = np.number).columns sc = StandardScaler() numerical_transformer = Pipeline(steps = [('scale', sc)]) preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough') preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)], remainder="passthrough") # Spliting the datset into Training and Test set X = dataset X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42 # Train Naive Bayes Model using the Training set # Handling imbalanced dataset under_sampler = RandomUnderSampler() X_under, y_under = under_sampler.fit_resample(X_train, y_train) classifier = GaussianNB() # select the appropriate algorithm for the problem statement model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)]) model.fit(X_under, y_under) # Predicting the Test result y_pred = model.predict(X_test) # Making the Confusion Matrix and evaluating performance cm = confusion_matrix(y_pred, y_test, labels=model.classes_) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent'])) disp.plot() plt.show() acc = accuracy_score(y_test, y_pred) # Pickling the model pickle_out = open("model.pkl", "wb") pickle.dump(model, pickle_out) pickle_out.close() # Loading the model to predict on the data pickle_in = open('model.pkl', 'rb') model = pickle.load(pickle_in) def welcome(): return 'welcome all' # defining the function which will make the prediction using the data which the user inputs def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type): X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type']) prediction = model.predict(X) print(prediction) return prediction # this is the main function in which we define our webpage def main(): # giving the webpage a title st.title("Fraud Detection ML App") st.header("Model Description", divider = "gray") multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containining a large variety of transaction types representing normal activities as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent. For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model ''' st.markdown(multi) st.markdown("To determine whether a transaction is normal or fraudulent, please **ENTER** the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :") col1, col2 = st.columns(2) with col1: Sender_Country = st.text_input("Sender Country") with col2: Bene_Country = st.text_input("Beneficiary Country") col3, col4 = st.columns(2) with col3: USD_amount = st.number_input("Amount in USD") with col4: Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)") result = "" if st.button("Predict"): result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type) if result == 0: st.success("The output is {}".format(result) + " This is a NORMAL transaction") if result == 1: st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION") if __name__=='__main__': main()