{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Libraries\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Load\n", "train_df = pd.read_csv(\"datasets/train.csv\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Preprocess\n", "train_df[\"Age\"] = train_df[\"Age\"].fillna(train_df[\"Age\"].median())\n", "\n", "train_df[\"Embarked\"] = train_df[\"Embarked\"].fillna(train_df[\"Embarked\"].mode()[0])\n", "\n", "train_df[\"Sex\"] = train_df[\"Sex\"].map({\"male\": 0, \"female\": 1})\n", "\n", "train_df = pd.get_dummies(train_df, columns=[\"Embarked\"], drop_first=True)\n", "\n", "train_df[\"FamilySize\"] = train_df[\"SibSp\"] + train_df[\"Parch\"] + 1\n", "\n", "train_df.drop([\"Name\", \"Ticket\", \"Cabin\", \"PassengerId\"], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Split\n", "X = train_df.drop(\"Survived\", axis=1)\n", "y = train_df[\"Survived\"]\n", "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(random_state=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestClassifier(random_state=1)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train\n", "model = RandomForestClassifier(n_estimators=100, random_state=1)\n", "model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7821229050279329\n", "Precision: 0.7931034482758621\n", "Recall: 0.6301369863013698\n", "F1 Score: 0.7022900763358778\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " 0 0.78 0.89 0.83 106\n", " 1 0.79 0.63 0.70 73\n", "\n", " accuracy 0.78 179\n", " macro avg 0.78 0.76 0.77 179\n", "weighted avg 0.78 0.78 0.78 179\n", "\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Evaluation\n", "y_pred = model.predict(X_val)\n", "y_pred = model.predict(X_val)\n", "print(\"Accuracy:\", accuracy_score(y_val, y_pred))\n", "print(\"Precision:\", precision_score(y_val, y_pred))\n", "print(\"Recall:\", recall_score(y_val, y_pred))\n", "print(\"F1 Score:\", f1_score(y_val, y_pred))\n", "print(\"Classification Report:\\n\", classification_report(y_val, y_pred))\n", "sns.heatmap(confusion_matrix(y_val, y_pred), annot=True, fmt=\"d\", cmap=\"Blues\")\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }