Spaces:
Sleeping
Sleeping
# feature_extraction/pipeline.py | |
import numpy as np | |
import joblib | |
from feature_extraction.embedding_from_text import get_bert_embedding | |
from feature_extraction.liwc_from_text import load_liwc_dic, liwc_vector | |
# Load the LIWC lexicon once | |
liwc_map = load_liwc_dic("models/output.dic") | |
# Load the scaler | |
scaler = joblib.load("models/scaler.pkl") | |
def text_to_features(text: str) -> np.ndarray: | |
# Get BERT embedding (768-dim) | |
emb_vec = get_bert_embedding(text) | |
# Get LIWC vector (~64-dim) | |
liwc_vec, _ = liwc_vector(text, liwc_map) | |
# Combine into one long vector | |
full_vec = np.concatenate([emb_vec, liwc_vec]) | |
# Standardize using the saved scaler | |
scaled_vec = scaler.transform([full_vec]) # shape: (1, total_dim) | |
return scaled_vec # Return the standardized vector for prediction |