class EnsembleDebertaConfig(PretrainedConfig): model_type = "ensemble-deberta" def __init__(self, num_models=3, **kwargs): super().__init__(**kwargs) self.num_models = num_models class EnsembleDeberta(PreTrainedModel): config_class = EnsembleDebertaConfig base_model_prefix = "ensemble-deberta" def __init__(self, model_paths=None, config=None): # If no config is provided, initialize a default one if config is None: config = EnsembleDebertaConfig() super().__init__(config) # Create a ModuleList to hold the individual models # Can pass additional kwargs (e.g. num_labels) if needed if model_paths is None: raise ValueError("Please provide a list of model paths.") # Load each model from the provided paths self.models = nn.ModuleList([ DebertaV2ForSequenceClassification.from_pretrained(path) for path in model_paths ]) if len(self.models) != self.config.num_models: # Update config if needed self.config.num_models = len(self.models) # Automatically load the tokenizer from the first submodel (as all submodels use the same tokenizer) self.tokenizer = AutoTokenizer.from_pretrained(save_directory) def forward(self, **kwargs): ''' Forward pass that obtains the logits from each model, computes softmax probabilities and averages them (soft voting). ''' # Collect softmax probabilities from each model probs = [] for model in self.models: # Each model is assumed to return a ModelOutput with a 'logits' attribute outputs = model(**kwargs) logits = outputs.logits # shape: (batch_size, num_labels) # Compute softmax for each model probs.append(torch.nn.functional.softmax(logits, dim=-1)) # Stack and average the probabilities avg_probs = torch.stack(probs, dim=0).mean(dim=0) return avg_probs def save_pretrained(self, save_directory, **kwargs): ''' Saves the ensemble model and its tokenizer. Each submodel is saved in its own subdirectory. ''' os.makedirs(save_directory, exist_ok=True) # Save the ensemble configuration self.config.save_pretrained(save_directory) # Save each submodel to its own subdirectory for idx, model in enumerate(self.models): sub_dir = os.path.join(save_directory, f"model_{idx}") model.save_pretrained(sub_dir) # Save the tokenizer self.tokenizer.save_pretrained(save_directory) print(f"Ensemble saved to {save_directory}.") @classmethod def from_pretrained(cls, save_directory, **kwargs): ''' Loads the ensemble model. ''' # Load the ensemble configuration config = EnsembleDebertaConfig.from_pretrained(save_directory) num_models = config.num_models # Determine the paths for each submodel model_paths = [os.path.join(save_directory, f"model_{idx}") for idx in range(num_models)] # Initialize the ensemble ensemble = cls(model_paths=model_paths, config=config) return ensemble