|
import torch |
|
import numpy as np |
|
from typing import Dict, Tuple |
|
|
|
from src.model import KickstarterModel |
|
|
|
class KickstarterExplainer: |
|
"""Kickstarter prediction model explainer""" |
|
|
|
def __init__(self, model: KickstarterModel, device: torch.device = None): |
|
""" |
|
Initialize the explainer. |
|
|
|
Args: |
|
model: Trained model. |
|
device: Computation device. |
|
""" |
|
self.model = model |
|
self.device = device if device is not None else torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
self.model.to(self.device) |
|
self.model.eval() |
|
|
|
|
|
self.numerical_feature_names = [ |
|
'description_length', |
|
'funding_goal', |
|
'image_count', |
|
'video_count', |
|
'campaign_duration', |
|
'previous_projects_count', |
|
'previous_success_rate', |
|
'previous_pledged', |
|
'previous_funding_goal' |
|
] |
|
|
|
|
|
self.embedding_map = { |
|
'description_embedding': 'description_embedding', |
|
'blurb_embedding': 'blurb_embedding', |
|
'risk_embedding': 'risk_embedding', |
|
'subcategory_embedding': 'subcategory_embedding', |
|
'category_embedding': 'category_embedding', |
|
'country_embedding': 'country_embedding' |
|
} |
|
|
|
def _compute_feature_contribution(self, baseline_probs, inputs, feature_name, is_numerical=False, index=None): |
|
|
|
feature_input = {k: torch.zeros_like(v) for k, v in inputs.items()} |
|
|
|
if is_numerical: |
|
feature_input['numerical_features'] = torch.zeros_like(inputs['numerical_features']) |
|
feature_input['numerical_features'][:, index] = inputs['numerical_features'][:, index] |
|
else: |
|
feature_input[feature_name] = inputs[feature_name] |
|
|
|
|
|
with torch.no_grad(): |
|
feature_probs, _ = self.model(feature_input) |
|
|
|
|
|
return (feature_probs - baseline_probs).cpu().item() |
|
|
|
def explain_prediction(self, inputs: Dict[str, torch.Tensor]) -> Tuple[float, Dict[str, float]]: |
|
""" |
|
Explain a single prediction. |
|
|
|
Args: |
|
inputs: Input features. |
|
|
|
Returns: |
|
Predicted probability and SHAP contribution values. |
|
""" |
|
|
|
inputs = {k: v.to(self.device) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
probs, _ = self.model(inputs) |
|
|
|
|
|
shap_values = {} |
|
baseline = {k: torch.zeros_like(v) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
baseline_probs, _ = self.model(baseline) |
|
|
|
|
|
for feature_name, embedding_name in self.embedding_map.items(): |
|
if embedding_name in inputs: |
|
shap_values[feature_name] = self._compute_feature_contribution( |
|
baseline_probs, inputs, embedding_name |
|
) |
|
|
|
|
|
if 'numerical_features' in inputs: |
|
num_features = inputs['numerical_features'].size(1) |
|
for i in range(num_features): |
|
feature_name = self.numerical_feature_names[i] |
|
shap_values[feature_name] = self._compute_feature_contribution( |
|
baseline_probs, inputs, 'numerical_features', |
|
is_numerical=True, index=i |
|
) |
|
|
|
|
|
prediction = probs.cpu().item() |
|
|
|
return prediction, shap_values |