Mikael Mieskolainen
initial commit v0
9fa359b
# HyperTrack model and training loss parameters
#
# match with the corresponding 'models_<ID>.py' under 'hypertrack/models/'
#
# [email protected], 2023
import numpy as np
# -------------------------------------------------------------------------
# Input normalization
# (e.g. can accelerate training, and mitigate float scale problems, but not necessarily needed)
normalize_input = False
"""
- coord[0] (min,max,mean,std): -1025.3399658203125 | 1025.3399658203125 | 1.0586246252059937 | 266.20428466796875
- coord[1] (min,max,mean,std): -1025.3399658203125 | 1025.3399658203125 | -0.022702794522047043 | 267.56085205078125
- coord[2] (min,max,mean,std): -2955.5 | 2955.5 | 1.6228374242782593 | 1064.4954833984375
"""
def feature_scaler(X):
mu = [1.06, -0.023, 1.62]
sigma = [266.2, 267.6, 1064.5]
for i in range(len(mu)):
X[:,i] = (X[:,i] - mu[i]) / sigma[i]
# -------------------------------------------------------------------------
# ** Training only parameters **
train_param = {
# Total loss weights per each individual loss
'beta': {
'net': {
'edge_BCE' : 0.2, # 0.2
'edge_contrastive' : 1.0, # 1.0
'cluster_BCE' : 0.2, # 0.2
'cluster_contrastive': 0.2, # 1.0
'cluster_neglogpdf': 0.0, # [EXPERIMENTAL] (keep it zero)
},
'pdf': {
'track_neglogpdf': 1.0, # [EXPERIMENTAL]
}
},
# Edge loss
'edge_BCE': {
'type': 'Focal', # 'Focal', 'BCE', 'BCE+Hinge'
'gamma': 1.0, # For 'Focal' (entropy exponent)
'delta': 0.05, # For 'BCE+Hinge' (proportion)
'remove_self_edges': False, # Remove self-edges
'edge_balance': True # true/false edge balance unity re-weight
},
# Contrastive loss per particle
'edge_contrastive': {
'weights': True, # TrackML hit weights ok with this
'type': 'softmax',
'tau': 0.3, # temperature (see: https://arxiv.org/abs/2012.09740, https://openreview.net/pdf?id=vnOHGQY4FP1)
'sub_sample': 300, # memory constraint (maximum number of target objects to compute the loss per event)
'min_prob': 1e-3, # minimum edge prob. score to be included in the loss [EXPERIMENTAL]
}, # (higher values push towards purity, but can weaken efficiency for e.g. high multiplicity clusters)
# Cluster hit binary cross entropy loss
'cluster_BCE': {
'weights': False, # TrackML hit weights (0 for noise) not exactly compatible
'type': 'Focal', # 'BCE', 'BCE+Hinge', 'Focal'
'gamma': 1.0, # For 'Focal' (entropy exponent)
'delta': 0.05, # For 'BCE+Hinge' (proportion)
},
# Cluster set hit loss
'cluster_contrastive': {
'weights': False, # TrackML hit weights (0 for noise) not exactly compatible
'type': 'intersect', # 'intersect', 'dice', 'jaccard'
'smooth': 1.0 # regularization for 'dice' and 'jaccard'
},
# Cluster meta-supervision target
'meta_target': 'pivotmajor' # 'major' (vote from all nodes ground truth) or 'pivotmajor' (vote from pivots ground truth)
}
# -------------------------------------------------------------------------
# These algorithm parameters can be changed after training, but
# note that the transformer network may adapt (learn) its weights according
# to the values set here during the training
cluster_param = {
# These are set from the command line interface
'algorithm': None,
'edge_threshold': None,
## Cut clustering & Transformer clustering input
'min_graph': 4, # Minimum subgraph size after the threshold and WCC search, the rest are treated as noise
## DBSCAN clustering
'dbscan': {
'eps': 0.2,
'min_samples': 3,
},
## HDBSCAN clustering
# https://hdbscan.readthedocs.io/en/latest/api.html
'hdbscan': {
'algorithm': 'generic',
'cluster_selection_epsilon': 0.0,
'cluster_selection_method': 'eom', # 'eom' or 'leaf'
'alpha': 1.0,
'min_samples': 2, # Keep it 2
'min_cluster_size': 4,
'max_dist': 1.0 # Keep it 1.0
},
## Transformer clustering
'worker_split': 4, # GPU Memory <-> GPU latency tradeoff (no accuracy impact)
'transformer': {
'seed_strategy': 'random', # 'random', 'max' (max norm), 'max_T (transverse max), 'min' (min norm), 'min_T' (transverse min)
'seed_ktop_max': 2, # Number of pivot walk (seed) candidates (higher -> better accuracy but slower)
'N_pivots': 3, # Number of pivotal hits to search per cluster (>> 1)
'random_paths': 1, # (Put >> 1 for MC sampled random walk, and 1 for greedy max-prob walk)
'max_failures': 2, # Maximum number of failures per pivot list nodes (put 1+ for greedy, >> 1 for MC walk)
'diffuse_threshold': 0.4, # Diffusion connectivity ~ Pivot quality threshold
# Micrograph extension type: 'pivot-spanned' (ok with 'hyper' adjacency), 'full' (for other than 'hyper' needed, more inclusive but possibly unstable)
'micrograph_mode': 'pivot-spanned',
'threshold_algo': 'fixed', # 'soft' (learnable), 'fisher' (batch-by-batch 1D-Fisher rule adaptive) or 'fixed'
'tau': 0.001, # 'soft':: Sigmoid 'temperature' (tau -> 0 ~ heaviside step)
'ktop_max': 30, # 'fisher':: Maximum cluster size (how many are considered from Transformer output), ranked by mask score
'fisher_threshold': np.linspace(0.4,0.6, 0), # 'fisher':: Threshold values tested
'fixed_threshold': 0.5, # 'fixed':: Note, if this is too high -> training may be unstable (first transformer iterations are bad)
'min_cluster_size': 4, # Require at least this many constituents per cluster
}
}
# -------------------------------------------------------------------------
### Geometric adjacency estimator
geom_param = {
# Use pre-trained 'voxdyn' or 'neurodyn' (experimental)
'algorithm': 'voxdyn',
# Print adjacency metrics (this will slow down significantly)
'verbose': False,
#'device': 'cuda', # CUDA not working with Faiss from conda atm (CUDA 11.4)
'device': 'cpu',
# 'neurodyn' parameters (PLACEHOLDER; not implemented)
'neural_param': {
'layers': [6, 128, 64, 1],
'act': 'silu',
'bn': True,
'dropout': 0.0,
'last_act': False
},
'neural_path': 'models/neurodyn'
}
# -------------------------------------------------------------------------
### GNN + Transformer model parameters
net_model_param = {
# GNN predictor block
'graph_block_param': {
'GNN_model' : 'SuperEdgeConv', # 'SuperEdgeConv', 'GaugeEdgeConv'
'nstack': 5, # Number of GNN message passing layers
'coord_dim': 3, # Input dimension
'h_dim': 64, # Intermediate latent embedding dimension
'z_dim': 61, # Final latent embedding dimension
# https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#aggregation-operators
'SuperEdgeConv': {
'm_dim': 64,
'aggr': ['mean']*5, # 'mean' (seems best memory/accuracy wise), 'sum', 'max', 'softmax', 'multi-aggregation', 'set-transformer'
'use_residual': True,
},
'GaugeEdgeConv': {
'm_dim': 64,
'aggr': ['mean']*5, # As many as 'nstack'
'norm_coord': False,
'norm_coord_scale_init': 1e-2,
},
# Edge prediction (correlation MLP) type: 'symmetric-dot', 'symmetrized', 'asymmetric'
# (clustering Transformer should prefer 'symmetric-dot')
'edge_type': 'symmetric-dot',
## Convolution (message passing) MLPs
'MLP_GNN_edge': {
'act': 'silu', # 'relu', 'tanh', 'silu', 'elu'
'bn': True,
'dropout': 0.0,
'last_act': True,
},
#'MLP_GNN_coord': { # Only for 'GaugeEdgeConv'
# 'act': 'silu',
# 'bn': True,
# 'dropout': 0.0,
# 'last_act': True,
#},
'MLP_GNN_latent': {
'act': 'silu',
'bn': True,
'dropout': 0.0,
'last_act': True,
},
## Latent Fusion MLP
'MLP_fusion': {
'act': 'silu',
'bn': True,
'dropout': 0.0,
'last_act': True,
},
## 2-pt edge correlation MLP
'MLP_correlate': {
'act': 'silu',
'bn': True,
'dropout': 0.0,
'last_act': False,
},
},
# Transformer clusterization block
'cluster_block_param': {
'in_dim': 64, # Same as GNN 'zdim' + 3 (for 3D coordinates)
'h_dim': 64, # Latent dim, needs to be divisible by num_heads
'output_dim': 1, # Always 1
'nstack_dec': 4, # Number of self-attention layers
'MLP_enc': { # First encoder MLP
'act': 'silu',
'bn': False,
'dropout': 0.0,
'last_act': False,
},
'MAB_dec': { # Transformer decoder MAB
'num_heads': 4,
'ln': True,
'dropout': 0.0,
'MLP_param':{
'act': 'silu',
'bn': False,
'dropout': 0.0,
'last_act': True,
}
},
'SAB_dec': { # Transformer decoder SAB
'num_heads': 4,
'ln': True,
'dropout': 0.0,
'MLP_param':{
'act': 'silu',
'bn': False,
'dropout': 0.0,
'last_act': True,
}
},
'MLP_mask': { # Mask decoder MLP
'act': 'silu',
'bn': False,
'dropout': 0.0,
'last_act': False,
}
}
}
# -------------------------------------------------------------------------
# [EXPERIMENTAL] -- normalizing flow
# Conditional data array indices (see /hypertrack/trackml.py)
cond_ind = [0,1,2,3,4,5,6]
pdf_model_param = {
'in_dim': 61,
'num_cond_inputs': len(cond_ind),
'h_dim': 196,
'nblocks': 4,
'act': 'tanh'
}