import pandas as pd import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import StandardScaler import joblib # Load Dataset data = pd.read_csv(load_dataset("nikesh66/Sarcasm-dataset")) data['user_feature'] = data['user_feature'].fillna(0) from datasets import load_dataset # Preprocessing text_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english') scaler = StandardScaler() preprocessor = ColumnTransformer( transformers=[ ('text', text_vectorizer, 'text'), ('user_features', scaler, ['user_feature']), ] ) # Model