Geant4 Cross Reference |
1 from dataclasses import dataclass 2 from typing import Tuple, Dict, Any, List 3 4 import numpy as np 5 from optuna import Trial, create_study, get_all_study_summaries, load_study 6 from optuna.pruners import MedianPruner 7 from optuna.samplers import TPESampler 8 from optuna.trial import TrialState 9 10 from core.constants import LEARNING_RATE, BATCH_SIZE_PER_REPLICA, ACTIVATION, OUT_ACTIVATION, \ 11 OPTIMIZER_TYPE, KERNEL_INITIALIZER, BIAS_INITIALIZER, N_TRIALS, LATENT_DIM, \ 12 INTERMEDIATE_DIMS, MAX_HIDDEN_LAYER_DIM, GLOBAL_CHECKPOINT_DIR 13 from core.model import VAEHandler 14 from utils.preprocess import preprocess 15 16 17 @dataclass 18 class HyperparameterTuner: 19 """Tuner which looks for the best hyperparameters of a Variational Autoencoder specified in model.py. 20 21 Currently, supported hyperparameters are: dimension of latent space, number of hidden layers, learning rate, 22 activation function, activation function after the final layer, optimizer type, kernel initializer, 23 bias initializer, batch size. 24 25 Attributes: 26 _discrete_parameters: A dictionary of hyperparameters taking discrete values in the range [low, high]. 27 _continuous_parameters: A dictionary of hyperparameters taking continuous values in the range [low, high]. 28 _categorical_parameters: A dictionary of hyperparameters taking values specified by the list of them. 29 _storage: A string representing URL to a database required for a distributed training 30 _study_name: A string, a name of study. 31 32 """ 33 _discrete_parameters: Dict[str, Tuple[int, int]] 34 _continuous_parameters: Dict[str, Tuple[float, float]] 35 _categorical_parameters: Dict[str, List[Any]] 36 _storage: str = None 37 _study_name: str = None 38 39 def _check_hyperparameters(self): 40 available_hyperparameters = ["latent_dim", "nb_hidden_layers", "learning_rate", "activation", "out_activation", 41 "optimizer_type", "kernel_initializer", "bias_initializer", 42 "batch_size_per_replica"] 43 hyperparameters_to_be_optimized = list(self._discrete_parameters.keys()) + list( 44 self._continuous_parameters.keys()) + list(self._categorical_parameters.keys()) 45 for hyperparameter_name in hyperparameters_to_be_optimized: 46 if hyperparameter_name not in available_hyperparameters: 47 raise Exception(f"Unknown hyperparameter: {hyperparameter_name}") 48 49 def __post_init__(self): 50 self._check_hyperparameters() 51 self._energies_train, self._cond_e_train, self._cond_angle_train, self._cond_geo_train = preprocess() 52 53 if self._storage is not None and self._study_name is not None: 54 # Parallel optimization 55 study_summaries = get_all_study_summaries(self._storage) 56 if any(self._study_name == study_summary.study_name for study_summary in study_summaries): 57 # The study is already created in the database. Load it. 58 self._study = load_study(self._study_name, self._storage) 59 else: 60 # The study does not exist in the database. Create a new one. 61 self._study = create_study(storage=self._storage, sampler=TPESampler(), pruner=MedianPruner(), 62 study_name=self._study_name, direction="minimize") 63 else: 64 # Single optimization 65 self._study = create_study(sampler=TPESampler(), pruner=MedianPruner(), direction="minimize") 66 67 def _create_model_handler(self, trial: Trial) -> VAEHandler: 68 """For a given trail builds the model. 69 70 Optuna suggests parameters like dimensions of particular layers of the model, learning rate, optimizer, etc. 71 72 Args: 73 trial: Optuna's trial 74 75 Returns: 76 Variational Autoencoder (VAE) 77 """ 78 79 # Discrete parameters 80 if "latent_dim" in self._discrete_parameters.keys(): 81 latent_dim = trial.suggest_int(name="latent_dim", 82 low=self._discrete_parameters["latent_dim"][0], 83 high=self._discrete_parameters["latent_dim"][1]) 84 else: 85 latent_dim = LATENT_DIM 86 87 if "nb_hidden_layers" in self._discrete_parameters.keys(): 88 nb_hidden_layers = trial.suggest_int(name="nb_hidden_layers", 89 low=self._discrete_parameters["nb_hidden_layers"][0], 90 high=self._discrete_parameters["nb_hidden_layers"][1]) 91 92 all_possible = np.arange(start=latent_dim + 5, stop=MAX_HIDDEN_LAYER_DIM) 93 chunks = np.array_split(all_possible, nb_hidden_layers) 94 ranges = [(chunk[0], chunk[-1]) for chunk in chunks] 95 ranges = reversed(ranges) 96 97 # Cast from np.int to int allows to become JSON serializable. 98 intermediate_dims = [trial.suggest_int(name=f"intermediate_dim_{i}", low=int(low), high=int(high)) for 99 i, (low, high) 100 in enumerate(ranges)] 101 else: 102 intermediate_dims = INTERMEDIATE_DIMS 103 104 if "batch_size_per_replica" in self._discrete_parameters.keys(): 105 batch_size_per_replica = trial.suggest_int(name="batch_size_per_replica", 106 low=self._discrete_parameters["batch_size_per_replica"][0], 107 high=self._discrete_parameters["batch_size_per_replica"][1]) 108 else: 109 batch_size_per_replica = BATCH_SIZE_PER_REPLICA 110 111 # Continuous parameters 112 if "learning_rate" in self._continuous_parameters.keys(): 113 learning_rate = trial.suggest_float(name="learning_rate", 114 low=self._continuous_parameters["learning_rate"][0], 115 high=self._continuous_parameters["learning_rate"][1]) 116 else: 117 learning_rate = LEARNING_RATE 118 119 # Categorical parameters 120 if "activation" in self._categorical_parameters.keys(): 121 activation = trial.suggest_categorical(name="activation", 122 choices=self._categorical_parameters["activation"]) 123 else: 124 activation = ACTIVATION 125 126 if "out_activation" in self._categorical_parameters.keys(): 127 out_activation = trial.suggest_categorical(name="out_activation", 128 choices=self._categorical_parameters["out_activation"]) 129 else: 130 out_activation = OUT_ACTIVATION 131 132 if "optimizer_type" in self._categorical_parameters.keys(): 133 optimizer_type = trial.suggest_categorical(name="optimizer_type", 134 choices=self._categorical_parameters["optimizer_type"]) 135 else: 136 optimizer_type = OPTIMIZER_TYPE 137 138 if "kernel_initializer" in self._categorical_parameters.keys(): 139 kernel_initializer = trial.suggest_categorical(name="kernel_initializer", 140 choices=self._categorical_parameters["kernel_initializer"]) 141 else: 142 kernel_initializer = KERNEL_INITIALIZER 143 144 if "bias_initializer" in self._categorical_parameters.keys(): 145 bias_initializer = trial.suggest_categorical(name="bias_initializer", 146 choices=self._categorical_parameters["bias_initializer"]) 147 else: 148 bias_initializer = BIAS_INITIALIZER 149 150 checkpoint_dir = f"{GLOBAL_CHECKPOINT_DIR}/{self._study_name}/trial_{trial.number:03d}" 151 152 return VAEHandler(_wandb_project_name=self._study_name, 153 _wandb_tags=["hyperparameter tuning", f"trial {trial.number}"], 154 _batch_size_per_replica=batch_size_per_replica, 155 _intermediate_dims=intermediate_dims, 156 latent_dim=latent_dim, 157 _learning_rate=learning_rate, 158 _activation=activation, 159 _out_activation=out_activation, 160 _optimizer_type=optimizer_type, 161 _kernel_initializer=kernel_initializer, 162 _bias_initializer=bias_initializer, 163 _checkpoint_dir=checkpoint_dir, 164 _early_stop=True, 165 _save_model_every_epoch=False, 166 _save_best_model=True, 167 ) 168 169 def _objective(self, trial: Trial) -> float: 170 """For a given trial trains the model and returns an average validation loss. 171 172 Args: 173 trial: Optuna's trial 174 175 Returns: One float numer which is a validation loss. It can be either calculated as an average of k trainings 176 performed in cross validation mode or is one number obtained from validation on unseen before, some fraction 177 of the dataset. 178 """ 179 180 # Generate the trial model. 181 model_handler = self._create_model_handler(trial) 182 183 # Train the model. 184 verbose = True 185 histories = model_handler.train(self._energies_train, self._cond_e_train, self._cond_angle_train, 186 self._cond_geo_train, verbose) 187 188 # Return validation loss (currently it is treated as an objective goal). Notice that we take into account the 189 # best model according to the validation loss. 190 final_validation_losses = [np.min(history.history["val_loss"]) for history in histories] 191 avg_validation_loss = np.mean(final_validation_losses).item() 192 return avg_validation_loss 193 194 def tune(self) -> None: 195 """Main tuning function. 196 197 Based on a given study, tunes the model and prints detailed information about the best trial (value of the 198 objective function and adjusted parameters). 199 """ 200 201 self._study.optimize(func=self._objective, n_trials=N_TRIALS, gc_after_trial=True) 202 pruned_trials = self._study.get_trials(deepcopy=False, states=(TrialState.PRUNED,)) 203 complete_trials = self._study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) 204 print("Study statistics: ") 205 print(" Number of finished trials: ", len(self._study.trials)) 206 print(" Number of pruned trials: ", len(pruned_trials)) 207 print(" Number of complete trials: ", len(complete_trials)) 208 209 print("Best trial:") 210 trial = self._study.best_trial 211 212 print(" Value: ", trial.value) 213 214 print(" Params: ") 215 for key, value in trial.params.items(): 216 print(f" {key}: {value}")