Geant4 Cross Reference |
1 from dataclasses import dataclass 1 from dataclasses import dataclass 2 from typing import Tuple, Dict, Any, List 2 from typing import Tuple, Dict, Any, List 3 3 4 import numpy as np 4 import numpy as np 5 from optuna import Trial, create_study, get_al 5 from optuna import Trial, create_study, get_all_study_summaries, load_study 6 from optuna.pruners import MedianPruner 6 from optuna.pruners import MedianPruner 7 from optuna.samplers import TPESampler 7 from optuna.samplers import TPESampler 8 from optuna.trial import TrialState 8 from optuna.trial import TrialState 9 9 10 from core.constants import LEARNING_RATE, BATC 10 from core.constants import LEARNING_RATE, BATCH_SIZE_PER_REPLICA, ACTIVATION, OUT_ACTIVATION, \ 11 OPTIMIZER_TYPE, KERNEL_INITIALIZER, BIAS_I 11 OPTIMIZER_TYPE, KERNEL_INITIALIZER, BIAS_INITIALIZER, N_TRIALS, LATENT_DIM, \ 12 INTERMEDIATE_DIMS, MAX_HIDDEN_LAYER_DIM, G 12 INTERMEDIATE_DIMS, MAX_HIDDEN_LAYER_DIM, GLOBAL_CHECKPOINT_DIR 13 from core.model import VAEHandler 13 from core.model import VAEHandler 14 from utils.preprocess import preprocess 14 from utils.preprocess import preprocess 15 15 16 16 17 @dataclass 17 @dataclass 18 class HyperparameterTuner: 18 class HyperparameterTuner: 19 """Tuner which looks for the best hyperpar 19 """Tuner which looks for the best hyperparameters of a Variational Autoencoder specified in model.py. 20 20 21 Currently, supported hyperparameters are: 21 Currently, supported hyperparameters are: dimension of latent space, number of hidden layers, learning rate, 22 activation function, activation function a 22 activation function, activation function after the final layer, optimizer type, kernel initializer, 23 bias initializer, batch size. 23 bias initializer, batch size. 24 24 25 Attributes: 25 Attributes: 26 _discrete_parameters: A dictionary of 26 _discrete_parameters: A dictionary of hyperparameters taking discrete values in the range [low, high]. 27 _continuous_parameters: A dictionary o 27 _continuous_parameters: A dictionary of hyperparameters taking continuous values in the range [low, high]. 28 _categorical_parameters: A dictionary 28 _categorical_parameters: A dictionary of hyperparameters taking values specified by the list of them. 29 _storage: A string representing URL to 29 _storage: A string representing URL to a database required for a distributed training 30 _study_name: A string, a name of study 30 _study_name: A string, a name of study. 31 31 32 """ 32 """ 33 _discrete_parameters: Dict[str, Tuple[int, 33 _discrete_parameters: Dict[str, Tuple[int, int]] 34 _continuous_parameters: Dict[str, Tuple[fl 34 _continuous_parameters: Dict[str, Tuple[float, float]] 35 _categorical_parameters: Dict[str, List[An 35 _categorical_parameters: Dict[str, List[Any]] 36 _storage: str = None 36 _storage: str = None 37 _study_name: str = None 37 _study_name: str = None 38 38 39 def _check_hyperparameters(self): 39 def _check_hyperparameters(self): 40 available_hyperparameters = ["latent_d 40 available_hyperparameters = ["latent_dim", "nb_hidden_layers", "learning_rate", "activation", "out_activation", 41 "optimize 41 "optimizer_type", "kernel_initializer", "bias_initializer", 42 "batch_si 42 "batch_size_per_replica"] 43 hyperparameters_to_be_optimized = list 43 hyperparameters_to_be_optimized = list(self._discrete_parameters.keys()) + list( 44 self._continuous_parameters.keys() 44 self._continuous_parameters.keys()) + list(self._categorical_parameters.keys()) 45 for hyperparameter_name in hyperparame 45 for hyperparameter_name in hyperparameters_to_be_optimized: 46 if hyperparameter_name not in avai 46 if hyperparameter_name not in available_hyperparameters: 47 raise Exception(f"Unknown hype 47 raise Exception(f"Unknown hyperparameter: {hyperparameter_name}") 48 48 49 def __post_init__(self): 49 def __post_init__(self): 50 self._check_hyperparameters() 50 self._check_hyperparameters() 51 self._energies_train, self._cond_e_tra 51 self._energies_train, self._cond_e_train, self._cond_angle_train, self._cond_geo_train = preprocess() 52 52 53 if self._storage is not None and self. 53 if self._storage is not None and self._study_name is not None: 54 # Parallel optimization 54 # Parallel optimization 55 study_summaries = get_all_study_su 55 study_summaries = get_all_study_summaries(self._storage) 56 if any(self._study_name == study_s 56 if any(self._study_name == study_summary.study_name for study_summary in study_summaries): 57 # The study is already created 57 # The study is already created in the database. Load it. 58 self._study = load_study(self. 58 self._study = load_study(self._study_name, self._storage) 59 else: 59 else: 60 # The study does not exist in 60 # The study does not exist in the database. Create a new one. 61 self._study = create_study(sto 61 self._study = create_study(storage=self._storage, sampler=TPESampler(), pruner=MedianPruner(), 62 stu 62 study_name=self._study_name, direction="minimize") 63 else: 63 else: 64 # Single optimization 64 # Single optimization 65 self._study = create_study(sampler 65 self._study = create_study(sampler=TPESampler(), pruner=MedianPruner(), direction="minimize") 66 66 67 def _create_model_handler(self, trial: Tri 67 def _create_model_handler(self, trial: Trial) -> VAEHandler: 68 """For a given trail builds the model. 68 """For a given trail builds the model. 69 69 70 Optuna suggests parameters like dimens 70 Optuna suggests parameters like dimensions of particular layers of the model, learning rate, optimizer, etc. 71 71 72 Args: 72 Args: 73 trial: Optuna's trial 73 trial: Optuna's trial 74 74 75 Returns: 75 Returns: 76 Variational Autoencoder (VAE) 76 Variational Autoencoder (VAE) 77 """ 77 """ 78 78 79 # Discrete parameters 79 # Discrete parameters 80 if "latent_dim" in self._discrete_para 80 if "latent_dim" in self._discrete_parameters.keys(): 81 latent_dim = trial.suggest_int(nam 81 latent_dim = trial.suggest_int(name="latent_dim", 82 low 82 low=self._discrete_parameters["latent_dim"][0], 83 hig 83 high=self._discrete_parameters["latent_dim"][1]) 84 else: 84 else: 85 latent_dim = LATENT_DIM 85 latent_dim = LATENT_DIM 86 86 87 if "nb_hidden_layers" in self._discret 87 if "nb_hidden_layers" in self._discrete_parameters.keys(): 88 nb_hidden_layers = trial.suggest_i 88 nb_hidden_layers = trial.suggest_int(name="nb_hidden_layers", 89 89 low=self._discrete_parameters["nb_hidden_layers"][0], 90 90 high=self._discrete_parameters["nb_hidden_layers"][1]) 91 91 92 all_possible = np.arange(start=lat 92 all_possible = np.arange(start=latent_dim + 5, stop=MAX_HIDDEN_LAYER_DIM) 93 chunks = np.array_split(all_possib 93 chunks = np.array_split(all_possible, nb_hidden_layers) 94 ranges = [(chunk[0], chunk[-1]) fo 94 ranges = [(chunk[0], chunk[-1]) for chunk in chunks] 95 ranges = reversed(ranges) 95 ranges = reversed(ranges) 96 96 97 # Cast from np.int to int allows t 97 # Cast from np.int to int allows to become JSON serializable. 98 intermediate_dims = [trial.suggest 98 intermediate_dims = [trial.suggest_int(name=f"intermediate_dim_{i}", low=int(low), high=int(high)) for 99 i, (low, high 99 i, (low, high) 100 in enumerate( 100 in enumerate(ranges)] 101 else: 101 else: 102 intermediate_dims = INTERMEDIATE_D 102 intermediate_dims = INTERMEDIATE_DIMS 103 103 104 if "batch_size_per_replica" in self._d 104 if "batch_size_per_replica" in self._discrete_parameters.keys(): 105 batch_size_per_replica = trial.sug 105 batch_size_per_replica = trial.suggest_int(name="batch_size_per_replica", 106 106 low=self._discrete_parameters["batch_size_per_replica"][0], 107 107 high=self._discrete_parameters["batch_size_per_replica"][1]) 108 else: 108 else: 109 batch_size_per_replica = BATCH_SIZ 109 batch_size_per_replica = BATCH_SIZE_PER_REPLICA 110 110 111 # Continuous parameters 111 # Continuous parameters 112 if "learning_rate" in self._continuous 112 if "learning_rate" in self._continuous_parameters.keys(): 113 learning_rate = trial.suggest_floa 113 learning_rate = trial.suggest_float(name="learning_rate", 114 114 low=self._continuous_parameters["learning_rate"][0], 115 115 high=self._continuous_parameters["learning_rate"][1]) 116 else: 116 else: 117 learning_rate = LEARNING_RATE 117 learning_rate = LEARNING_RATE 118 118 119 # Categorical parameters 119 # Categorical parameters 120 if "activation" in self._categorical_p 120 if "activation" in self._categorical_parameters.keys(): 121 activation = trial.suggest_categor 121 activation = trial.suggest_categorical(name="activation", 122 122 choices=self._categorical_parameters["activation"]) 123 else: 123 else: 124 activation = ACTIVATION 124 activation = ACTIVATION 125 125 126 if "out_activation" in self._categoric 126 if "out_activation" in self._categorical_parameters.keys(): 127 out_activation = trial.suggest_cat 127 out_activation = trial.suggest_categorical(name="out_activation", 128 128 choices=self._categorical_parameters["out_activation"]) 129 else: 129 else: 130 out_activation = OUT_ACTIVATION 130 out_activation = OUT_ACTIVATION 131 131 132 if "optimizer_type" in self._categoric 132 if "optimizer_type" in self._categorical_parameters.keys(): 133 optimizer_type = trial.suggest_cat 133 optimizer_type = trial.suggest_categorical(name="optimizer_type", 134 134 choices=self._categorical_parameters["optimizer_type"]) 135 else: 135 else: 136 optimizer_type = OPTIMIZER_TYPE 136 optimizer_type = OPTIMIZER_TYPE 137 137 138 if "kernel_initializer" in self._categ 138 if "kernel_initializer" in self._categorical_parameters.keys(): 139 kernel_initializer = trial.suggest 139 kernel_initializer = trial.suggest_categorical(name="kernel_initializer", 140 140 choices=self._categorical_parameters["kernel_initializer"]) 141 else: 141 else: 142 kernel_initializer = KERNEL_INITIA 142 kernel_initializer = KERNEL_INITIALIZER 143 143 144 if "bias_initializer" in self._categor 144 if "bias_initializer" in self._categorical_parameters.keys(): 145 bias_initializer = trial.suggest_c 145 bias_initializer = trial.suggest_categorical(name="bias_initializer", 146 146 choices=self._categorical_parameters["bias_initializer"]) 147 else: 147 else: 148 bias_initializer = BIAS_INITIALIZE 148 bias_initializer = BIAS_INITIALIZER 149 149 150 checkpoint_dir = f"{GLOBAL_CHECKPOINT_ 150 checkpoint_dir = f"{GLOBAL_CHECKPOINT_DIR}/{self._study_name}/trial_{trial.number:03d}" 151 151 152 return VAEHandler(_wandb_project_name= 152 return VAEHandler(_wandb_project_name=self._study_name, 153 _wandb_tags=["hyperp 153 _wandb_tags=["hyperparameter tuning", f"trial {trial.number}"], 154 _batch_size_per_repl 154 _batch_size_per_replica=batch_size_per_replica, 155 _intermediate_dims=i 155 _intermediate_dims=intermediate_dims, 156 latent_dim=latent_di 156 latent_dim=latent_dim, 157 _learning_rate=learn 157 _learning_rate=learning_rate, 158 _activation=activati 158 _activation=activation, 159 _out_activation=out_ 159 _out_activation=out_activation, 160 _optimizer_type=opti 160 _optimizer_type=optimizer_type, 161 _kernel_initializer= 161 _kernel_initializer=kernel_initializer, 162 _bias_initializer=bi 162 _bias_initializer=bias_initializer, 163 _checkpoint_dir=chec 163 _checkpoint_dir=checkpoint_dir, 164 _early_stop=True, 164 _early_stop=True, 165 _save_model_every_ep 165 _save_model_every_epoch=False, 166 _save_best_model=Tru 166 _save_best_model=True, 167 ) 167 ) 168 168 169 def _objective(self, trial: Trial) -> floa 169 def _objective(self, trial: Trial) -> float: 170 """For a given trial trains the model 170 """For a given trial trains the model and returns an average validation loss. 171 171 172 Args: 172 Args: 173 trial: Optuna's trial 173 trial: Optuna's trial 174 174 175 Returns: One float numer which is a va 175 Returns: One float numer which is a validation loss. It can be either calculated as an average of k trainings 176 performed in cross validation mode or 176 performed in cross validation mode or is one number obtained from validation on unseen before, some fraction 177 of the dataset. 177 of the dataset. 178 """ 178 """ 179 179 180 # Generate the trial model. 180 # Generate the trial model. 181 model_handler = self._create_model_han 181 model_handler = self._create_model_handler(trial) 182 182 183 # Train the model. 183 # Train the model. 184 verbose = True 184 verbose = True 185 histories = model_handler.train(self._ 185 histories = model_handler.train(self._energies_train, self._cond_e_train, self._cond_angle_train, 186 self._ 186 self._cond_geo_train, verbose) 187 187 188 # Return validation loss (currently it 188 # Return validation loss (currently it is treated as an objective goal). Notice that we take into account the 189 # best model according to the validati 189 # best model according to the validation loss. 190 final_validation_losses = [np.min(hist 190 final_validation_losses = [np.min(history.history["val_loss"]) for history in histories] 191 avg_validation_loss = np.mean(final_va 191 avg_validation_loss = np.mean(final_validation_losses).item() 192 return avg_validation_loss 192 return avg_validation_loss 193 193 194 def tune(self) -> None: 194 def tune(self) -> None: 195 """Main tuning function. 195 """Main tuning function. 196 196 197 Based on a given study, tunes the mode 197 Based on a given study, tunes the model and prints detailed information about the best trial (value of the 198 objective function and adjusted parame 198 objective function and adjusted parameters). 199 """ 199 """ 200 200 201 self._study.optimize(func=self._object 201 self._study.optimize(func=self._objective, n_trials=N_TRIALS, gc_after_trial=True) 202 pruned_trials = self._study.get_trials 202 pruned_trials = self._study.get_trials(deepcopy=False, states=(TrialState.PRUNED,)) 203 complete_trials = self._study.get_tria 203 complete_trials = self._study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)) 204 print("Study statistics: ") 204 print("Study statistics: ") 205 print(" Number of finished trials: ", 205 print(" Number of finished trials: ", len(self._study.trials)) 206 print(" Number of pruned trials: ", l 206 print(" Number of pruned trials: ", len(pruned_trials)) 207 print(" Number of complete trials: ", 207 print(" Number of complete trials: ", len(complete_trials)) 208 208 209 print("Best trial:") 209 print("Best trial:") 210 trial = self._study.best_trial 210 trial = self._study.best_trial 211 211 212 print(" Value: ", trial.value) 212 print(" Value: ", trial.value) 213 213 214 print(" Params: ") 214 print(" Params: ") 215 for key, value in trial.params.items() 215 for key, value in trial.params.items(): 216 print(f" {key}: {value}") 216 print(f" {key}: {value}")