{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Libraries\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Load\n", "train_df = pd.read_csv(\"datasets/train.csv\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Preprocess\n", "train_df[\"Age\"] = train_df[\"Age\"].fillna(train_df[\"Age\"].median())\n", "\n", "train_df[\"Embarked\"] = train_df[\"Embarked\"].fillna(train_df[\"Embarked\"].mode()[0])\n", "\n", "train_df[\"Sex\"] = train_df[\"Sex\"].map({\"male\": 0, \"female\": 1})\n", "\n", "train_df = pd.get_dummies(train_df, columns=[\"Embarked\"], drop_first=True)\n", "\n", "train_df[\"FamilySize\"] = train_df[\"SibSp\"] + train_df[\"Parch\"] + 1\n", "\n", "train_df.drop([\"Name\", \"Ticket\", \"Cabin\", \"PassengerId\"], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Split\n", "X = train_df.drop(\"Survived\", axis=1)\n", "y = train_df[\"Survived\"]\n", "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestClassifier(random_state=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(random_state=1)