# HyperTrack model and training loss parameters | |
# | |
# match with the corresponding 'models_<ID>.py' under 'hypertrack/models/' | |
# | |
# [email protected], 2023 | |
import numpy as np | |
# ------------------------------------------------------------------------- | |
# Input normalization | |
# (e.g. can accelerate training, and mitigate float scale problems, but not necessarily needed) | |
normalize_input = False | |
""" | |
- coord[0] (min,max,mean,std): -1025.3399658203125 | 1025.3399658203125 | 1.0586246252059937 | 266.20428466796875 | |
- coord[1] (min,max,mean,std): -1025.3399658203125 | 1025.3399658203125 | -0.022702794522047043 | 267.56085205078125 | |
- coord[2] (min,max,mean,std): -2955.5 | 2955.5 | 1.6228374242782593 | 1064.4954833984375 | |
""" | |
def feature_scaler(X): | |
mu = [1.06, -0.023, 1.62] | |
sigma = [266.2, 267.6, 1064.5] | |
for i in range(len(mu)): | |
X[:,i] = (X[:,i] - mu[i]) / sigma[i] | |
# ------------------------------------------------------------------------- | |
# ** Training only parameters ** | |
train_param = { | |
# Total loss weights per each individual loss | |
'beta': { | |
'net': { | |
'edge_BCE' : 0.2, # 0.2 | |
'edge_contrastive' : 1.0, # 1.0 | |
'cluster_BCE' : 0.2, # 0.2 | |
'cluster_contrastive': 0.2, # 1.0 | |
'cluster_neglogpdf': 0.0, # [EXPERIMENTAL] (keep it zero) | |
}, | |
'pdf': { | |
'track_neglogpdf': 1.0, # [EXPERIMENTAL] | |
} | |
}, | |
# Edge loss | |
'edge_BCE': { | |
'type': 'Focal', # 'Focal', 'BCE', 'BCE+Hinge' | |
'gamma': 1.0, # For 'Focal' (entropy exponent) | |
'delta': 0.05, # For 'BCE+Hinge' (proportion) | |
'remove_self_edges': False, # Remove self-edges | |
'edge_balance': True # true/false edge balance unity re-weight | |
}, | |
# Contrastive loss per particle | |
'edge_contrastive': { | |
'weights': True, # TrackML hit weights ok with this | |
'type': 'softmax', | |
'tau': 0.3, # temperature (see: https://arxiv.org/abs/2012.09740, https://openreview.net/pdf?id=vnOHGQY4FP1) | |
'sub_sample': 300, # memory constraint (maximum number of target objects to compute the loss per event) | |
'min_prob': 1e-3, # minimum edge prob. score to be included in the loss [EXPERIMENTAL] | |
}, # (higher values push towards purity, but can weaken efficiency for e.g. high multiplicity clusters) | |
# Cluster hit binary cross entropy loss | |
'cluster_BCE': { | |
'weights': False, # TrackML hit weights (0 for noise) not exactly compatible | |
'type': 'Focal', # 'BCE', 'BCE+Hinge', 'Focal' | |
'gamma': 1.0, # For 'Focal' (entropy exponent) | |
'delta': 0.05, # For 'BCE+Hinge' (proportion) | |
}, | |
# Cluster set hit loss | |
'cluster_contrastive': { | |
'weights': False, # TrackML hit weights (0 for noise) not exactly compatible | |
'type': 'intersect', # 'intersect', 'dice', 'jaccard' | |
'smooth': 1.0 # regularization for 'dice' and 'jaccard' | |
}, | |
# Cluster meta-supervision target | |
'meta_target': 'pivotmajor' # 'major' (vote from all nodes ground truth) or 'pivotmajor' (vote from pivots ground truth) | |
} | |
# ------------------------------------------------------------------------- | |
# These algorithm parameters can be changed after training, but | |
# note that the transformer network may adapt (learn) its weights according | |
# to the values set here during the training | |
cluster_param = { | |
# These are set from the command line interface | |
'algorithm': None, | |
'edge_threshold': None, | |
## Cut clustering & Transformer clustering input | |
'min_graph': 4, # Minimum subgraph size after the threshold and WCC search, the rest are treated as noise | |
## DBSCAN clustering | |
'dbscan': { | |
'eps': 0.2, | |
'min_samples': 3, | |
}, | |
## HDBSCAN clustering | |
# https://hdbscan.readthedocs.io/en/latest/api.html | |
'hdbscan': { | |
'algorithm': 'generic', | |
'cluster_selection_epsilon': 0.0, | |
'cluster_selection_method': 'eom', # 'eom' or 'leaf' | |
'alpha': 1.0, | |
'min_samples': 2, # Keep it 2 | |
'min_cluster_size': 4, | |
'max_dist': 1.0 # Keep it 1.0 | |
}, | |
## Transformer clustering | |
'worker_split': 4, # GPU Memory <-> GPU latency tradeoff (no accuracy impact) | |
'transformer': { | |
'seed_strategy': 'random', # 'random', 'max' (max norm), 'max_T (transverse max), 'min' (min norm), 'min_T' (transverse min) | |
'seed_ktop_max': 2, # Number of pivot walk (seed) candidates (higher -> better accuracy but slower) | |
'N_pivots': 3, # Number of pivotal hits to search per cluster (>> 1) | |
'random_paths': 1, # (Put >> 1 for MC sampled random walk, and 1 for greedy max-prob walk) | |
'max_failures': 2, # Maximum number of failures per pivot list nodes (put 1+ for greedy, >> 1 for MC walk) | |
'diffuse_threshold': 0.4, # Diffusion connectivity ~ Pivot quality threshold | |
# Micrograph extension type: 'pivot-spanned' (ok with 'hyper' adjacency), 'full' (for other than 'hyper' needed, more inclusive but possibly unstable) | |
'micrograph_mode': 'pivot-spanned', | |
'threshold_algo': 'fixed', # 'soft' (learnable), 'fisher' (batch-by-batch 1D-Fisher rule adaptive) or 'fixed' | |
'tau': 0.001, # 'soft':: Sigmoid 'temperature' (tau -> 0 ~ heaviside step) | |
'ktop_max': 30, # 'fisher':: Maximum cluster size (how many are considered from Transformer output), ranked by mask score | |
'fisher_threshold': np.linspace(0.4,0.6, 0), # 'fisher':: Threshold values tested | |
'fixed_threshold': 0.5, # 'fixed':: Note, if this is too high -> training may be unstable (first transformer iterations are bad) | |
'min_cluster_size': 4, # Require at least this many constituents per cluster | |
} | |
} | |
# ------------------------------------------------------------------------- | |
### Geometric adjacency estimator | |
geom_param = { | |
# Use pre-trained 'voxdyn' or 'neurodyn' (experimental) | |
'algorithm': 'voxdyn', | |
# Print adjacency metrics (this will slow down significantly) | |
'verbose': False, | |
#'device': 'cuda', # CUDA not working with Faiss from conda atm (CUDA 11.4) | |
'device': 'cpu', | |
# 'neurodyn' parameters (PLACEHOLDER; not implemented) | |
'neural_param': { | |
'layers': [6, 128, 64, 1], | |
'act': 'silu', | |
'bn': True, | |
'dropout': 0.0, | |
'last_act': False | |
}, | |
'neural_path': 'models/neurodyn' | |
} | |
# ------------------------------------------------------------------------- | |
### GNN + Transformer model parameters | |
net_model_param = { | |
# GNN predictor block | |
'graph_block_param': { | |
'GNN_model' : 'SuperEdgeConv', # 'SuperEdgeConv', 'GaugeEdgeConv' | |
'nstack': 5, # Number of GNN message passing layers | |
'coord_dim': 3, # Input dimension | |
'h_dim': 64, # Intermediate latent embedding dimension | |
'z_dim': 61, # Final latent embedding dimension | |
# https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#aggregation-operators | |
'SuperEdgeConv': { | |
'm_dim': 64, | |
'aggr': ['mean']*5, # 'mean' (seems best memory/accuracy wise), 'sum', 'max', 'softmax', 'multi-aggregation', 'set-transformer' | |
'use_residual': True, | |
}, | |
'GaugeEdgeConv': { | |
'm_dim': 64, | |
'aggr': ['mean']*5, # As many as 'nstack' | |
'norm_coord': False, | |
'norm_coord_scale_init': 1e-2, | |
}, | |
# Edge prediction (correlation MLP) type: 'symmetric-dot', 'symmetrized', 'asymmetric' | |
# (clustering Transformer should prefer 'symmetric-dot') | |
'edge_type': 'symmetric-dot', | |
## Convolution (message passing) MLPs | |
'MLP_GNN_edge': { | |
'act': 'silu', # 'relu', 'tanh', 'silu', 'elu' | |
'bn': True, | |
'dropout': 0.0, | |
'last_act': True, | |
}, | |
#'MLP_GNN_coord': { # Only for 'GaugeEdgeConv' | |
# 'act': 'silu', | |
# 'bn': True, | |
# 'dropout': 0.0, | |
# 'last_act': True, | |
#}, | |
'MLP_GNN_latent': { | |
'act': 'silu', | |
'bn': True, | |
'dropout': 0.0, | |
'last_act': True, | |
}, | |
## Latent Fusion MLP | |
'MLP_fusion': { | |
'act': 'silu', | |
'bn': True, | |
'dropout': 0.0, | |
'last_act': True, | |
}, | |
## 2-pt edge correlation MLP | |
'MLP_correlate': { | |
'act': 'silu', | |
'bn': True, | |
'dropout': 0.0, | |
'last_act': False, | |
}, | |
}, | |
# Transformer clusterization block | |
'cluster_block_param': { | |
'in_dim': 64, # Same as GNN 'zdim' + 3 (for 3D coordinates) | |
'h_dim': 64, # Latent dim, needs to be divisible by num_heads | |
'output_dim': 1, # Always 1 | |
'nstack_dec': 4, # Number of self-attention layers | |
'MLP_enc': { # First encoder MLP | |
'act': 'silu', | |
'bn': False, | |
'dropout': 0.0, | |
'last_act': False, | |
}, | |
'MAB_dec': { # Transformer decoder MAB | |
'num_heads': 4, | |
'ln': True, | |
'dropout': 0.0, | |
'MLP_param':{ | |
'act': 'silu', | |
'bn': False, | |
'dropout': 0.0, | |
'last_act': True, | |
} | |
}, | |
'SAB_dec': { # Transformer decoder SAB | |
'num_heads': 4, | |
'ln': True, | |
'dropout': 0.0, | |
'MLP_param':{ | |
'act': 'silu', | |
'bn': False, | |
'dropout': 0.0, | |
'last_act': True, | |
} | |
}, | |
'MLP_mask': { # Mask decoder MLP | |
'act': 'silu', | |
'bn': False, | |
'dropout': 0.0, | |
'last_act': False, | |
} | |
} | |
} | |
# ------------------------------------------------------------------------- | |
# [EXPERIMENTAL] -- normalizing flow | |
# Conditional data array indices (see /hypertrack/trackml.py) | |
cond_ind = [0,1,2,3,4,5,6] | |
pdf_model_param = { | |
'in_dim': 61, | |
'num_cond_inputs': len(cond_ind), | |
'h_dim': 196, | |
'nblocks': 4, | |
'act': 'tanh' | |
} | |