|  | import pandas as pd | 
					
						
						|  | from sklearn.model_selection import train_test_split | 
					
						
						|  | from sklearn.preprocessing import OneHotEncoder, StandardScaler | 
					
						
						|  | from sklearn.compose import ColumnTransformer | 
					
						
						|  | from sklearn.pipeline import Pipeline | 
					
						
						|  | from sklearn.ensemble import GradientBoostingClassifier | 
					
						
						|  | from sklearn.metrics import roc_auc_score | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | train_data = pd.read_csv("./input/train.csv") | 
					
						
						|  | test_data = pd.read_csv("./input/test.csv") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | X = train_data.drop(["Exited", "id", "CustomerId", "Surname"], axis=1) | 
					
						
						|  | y = train_data["Exited"] | 
					
						
						|  | X_test = test_data.drop(["id", "CustomerId", "Surname"], axis=1) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | numerical_transformer = StandardScaler() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | categorical_transformer = OneHotEncoder(handle_unknown="ignore") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | preprocessor = ColumnTransformer( | 
					
						
						|  | transformers=[ | 
					
						
						|  | ("num", numerical_transformer, X.select_dtypes(exclude=["object"]).columns), | 
					
						
						|  | ("cat", categorical_transformer, X.select_dtypes(include=["object"]).columns), | 
					
						
						|  | ] | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model = GradientBoostingClassifier() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | clf = Pipeline(steps=[("preprocessor", preprocessor), ("model", model)]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | X_train, X_valid, y_train, y_valid = train_test_split( | 
					
						
						|  | X, y, test_size=0.2, random_state=0 | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | clf.fit(X_train, y_train) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | preds = clf.predict_proba(X_valid)[:, 1] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | score = roc_auc_score(y_valid, preds) | 
					
						
						|  | print(f"ROC AUC score: {score}") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | preds_test = clf.predict_proba(X_test)[:, 1] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | output = pd.DataFrame({"id": test_data.id, "Exited": preds_test}) | 
					
						
						|  | output.to_csv("./working/submission.csv", index=False) | 
					
						
						|  |  |