import pandas as pd from sklearn.model_selection import train_test_split def load_data(file_path): """Load data from a CSV file.""" return pd.read_csv(file_path) def clean_data(df): """Clean the dataset by handling missing values and duplicates.""" df = df.dropna() df = df.drop_duplicates() return df def preprocess_data(df, target_column): """Preprocess the data by splitting into features and target.""" X = df.drop(columns=[target_column]) y = df[target_column] return X, y def split_data(X, y, test_size=0.2, random_state=42): """Split the data into training and testing sets.""" return train_test_split(X, y, test_size=test_size, random_state=random_state)