|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.metrics import classification_report |
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
import joblib |
|
|
|
|
|
|
|
|
data = pd.read_csv(load_dataset("nikesh66/Sarcasm-dataset")) |
|
|
data['user_feature'] = data['user_feature'].fillna(0) |
|
|
|
|
|
from datasets import load_dataset |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english') |
|
|
scaler = StandardScaler() |
|
|
preprocessor = ColumnTransformer( |
|
|
transformers=[ |
|
|
('text', text_vectorizer, 'text'), |
|
|
('user_features', scaler, ['user_feature']), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|