Skip to content

VIPRSGrid

VIPRSGrid

Bases: VIPRS

A class to fit the VIPRS model to data using a grid of hyperparameters. Instead of having a single set of hyperparameters, we simultaneously fit multiple models with different hyperparameters and compare their performance at the end. This class is generic and does not support any model selection or averaging schemes.

The class inherits all the basic attributes from the VIPRS class.

Attributes:

Name Type Description
grid_table

A pandas table containing the hyperparameters for each model.

n_models

The number of models to fit.

shapes

A dictionary containing the shapes of the data matrices.

active_models

A boolean array indicating which models are still active (i.e. not converged).

Source code in viprs/model/gridsearch/VIPRSGrid.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
class VIPRSGrid(VIPRS):
    """
    A class to fit the `VIPRS` model to data using a grid of hyperparameters.
    Instead of having a single set of hyperparameters, we simultaneously fit
    multiple models with different hyperparameters and compare their performance
    at the end. This class is generic and does not support any model selection or
    averaging schemes.

    The class inherits all the basic attributes from the [VIPRS][viprs.model.VIPRS.VIPRS] class.

    !!! seealso "See Also"
        * [VIPRSGridSearch][viprs.model.gridsearch.VIPRSGridSearch.VIPRSGridSearch]
        * [VIPRSBMA][viprs.model.gridsearch.VIPRSBMA.VIPRSBMA]

    :ivar grid_table: A pandas table containing the hyperparameters for each model.
    :ivar n_models: The number of models to fit.
    :ivar shapes: A dictionary containing the shapes of the data matrices.
    :ivar active_models: A boolean array indicating which models are still active (i.e. not converged).

    """

    def __init__(self,
                 gdl,
                 grid,
                 **kwargs):
        """
        Initialize the `VIPRS` model with a grid of hyperparameters.

        :param gdl: An instance of `GWADataLoader`
        :param grid: An instance of `HyperparameterGrid`
        :param kwargs: Additional keyword arguments to pass to the parent `VIPRS` class.
        """

        self.grid_table = grid.to_table()
        self.n_models = len(self.grid_table)
        assert self.n_models > 1

        grid_params = {c: self.grid_table[c].values for c in self.grid_table.columns}

        if 'fix_params' not in kwargs:
            kwargs['fix_params'] = grid_params
        else:
            kwargs['fix_params'].update(grid_params)

        # Make sure that the matrices are in Fortran order:
        kwargs['order'] = 'F'

        super().__init__(gdl, **kwargs)

        self.shapes = {c: (shp, self.n_models)
                       for c, shp in self.shapes.items()}
        self.active_models = None
        self.Nj = {c: Nj[:, None].astype(self.float_precision, order=self.order) for c, Nj in self.Nj.items()}
        self.optim_results = [OptimizeResult() for _ in range(self.n_models)]

    @property
    def models_to_keep(self):
        """
        :return: A boolean array indicating which models have converged successfully.
        """
        return np.logical_or(self.active_models, self.converged_models)

    @property
    def converged_models(self):
        return np.array([optr.success for optr in self.optim_results])

    def initialize_theta(self, theta_0=None):
        """
        Initialize the global hyperparameters of the model.
        :param theta_0: A dictionary of initial values for the hyperparameters theta
        """

        self.active_models = np.array([True for _ in range(self.n_models)])

        super().initialize_theta(theta_0=theta_0)

        try:
            if self.pi.shape != (self.n_models, ):
                self.pi *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
        except AttributeError:
            self.pi *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

        try:
            if self.tau_beta.shape != (self.n_models, ):
                self.tau_beta *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
        except AttributeError:
            self.tau_beta *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

        try:
            if self.sigma_epsilon.shape != (self.n_models, ):
                self.sigma_epsilon *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
        except AttributeError:
            self.sigma_epsilon *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

        try:
            if self._sigma_g.shape != (self.n_models, ):
                self._sigma_g *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
        except AttributeError:
            self._sigma_g *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

    def init_optim_meta(self):
        """
        Initialize the various quantities/objects to keep track of the optimization process.
         This method initializes the "history" object (which keeps track of the objective + other
         hyperparameters requested by the user), in addition to the OptimizeResult objects.
        """
        super().init_optim_meta()

        # Reset the OptimizeResult objects:
        for optr in self.optim_results:
            optr.reset()

    def e_step(self):
        """
        Run the E-Step of the Variational EM algorithm.
        Here, we update the variational parameters for each variant using coordinate
        ascent optimization techniques. The coordinate ascent procedure is run on all the models
        in the grid simultaneously. The update equations are outlined in
        the Supplementary Material of the following paper:

        > Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference.
        Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009.
        Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
        """

        active_model_idx = np.where(self.active_models)[0].astype(np.int32)

        for c, shapes in self.shapes.items():

            # Get the priors:
            tau_beta = self.get_tau_beta(c)
            pi = self.get_pi(c)

            # Updates for tau variational parameters:
            # NOTE: Here, we compute the variational sigma in-place to avoid the need
            # to change the order of the resulting matrix or its float precision:
            np.add(self.Nj[c] / self.sigma_epsilon, tau_beta,
                   out=self.var_tau[c])
            np.log(self.var_tau[c], out=self._log_var_tau[c])

            # Compute some quantities that are needed for the per-SNP updates:
            mu_mult = self.Nj[c] / (self.var_tau[c] * self.sigma_epsilon)
            u_logs = np.log(pi) - np.log(1. - pi) + .5 * (np.log(tau_beta) - self._log_var_tau[c])

            if self.use_cpp:
                cpp_e_step_grid(self.ld_left_bound[c],
                                self.ld_indptr[c],
                                self.ld_data[c],
                                self.std_beta[c],
                                self.var_gamma[c],
                                self.var_mu[c],
                                self.eta[c],
                                self.q[c],
                                self.eta_diff[c],
                                u_logs,
                                0.5 * self.var_tau[c],
                                mu_mult,
                                self.dequantize_scale,
                                active_model_idx,
                                self.threads,
                                self.use_blas,
                                self.low_memory)
            else:

                e_step_grid(self.ld_left_bound[c],
                            self.ld_indptr[c],
                            self.ld_data[c],
                            self.std_beta[c],
                            self.var_gamma[c],
                            self.var_mu[c],
                            self.eta[c],
                            self.q[c],
                            self.eta_diff[c],
                            u_logs,
                            0.5 * self.var_tau[c],
                            mu_mult,
                            active_model_idx,
                            self.threads,
                            self.use_blas,
                            self.low_memory)

        self.zeta = self.compute_zeta()

    def to_theta_table(self):
        """
        :return: A `pandas` DataFrame containing information about the hyperparameters of the model.
        """

        if self.n_models == 1:
            return super(VIPRSGrid, self).to_theta_table()

        sig_e = self.sigma_epsilon
        h2 = self.get_heritability()
        pi = self.get_proportion_causal()

        if isinstance(self.tau_beta, dict):
            taus = dict_mean(self.tau_beta, axis=0)
        else:
            taus = self.tau_beta

        theta_table = []

        for m in range(self.n_models):

            theta_table += [
                {'Model': m, 'Parameter': 'Residual_variance', 'Value': sig_e[m]},
                {'Model': m, 'Parameter': 'Heritability', 'Value': h2[m]},
                {'Model': m, 'Parameter': 'Proportion_causal', 'Value': pi[m]},
                {'Model': m, 'Parameter': 'sigma_beta', 'Value': taus[m]}
            ]

        return pd.DataFrame(theta_table)

    def to_validation_table(self):
        """
        :return: The validation table summarizing the performance of each model.
        :raises ValueError: if the validation result is not set.
        """
        if self.validation_result is None:
            raise ValueError("Validation result is not set!")
        elif len(self.validation_result) < 1:
            raise ValueError("Validation result is not set!")

        return pd.DataFrame(self.validation_result)

    def write_validation_result(self, v_filename, sep="\t"):
        """
        After performing hyperparameter search, write a table
        that records that value of the objective for each combination
        of hyperparameters.
        :param v_filename: The filename for the validation table.
        :param sep: The separator for the validation table
        """

        v_df = self.to_validation_table()
        v_df.to_csv(v_filename, index=False, sep=sep)

    def fit(self,
            max_iter=1000,
            theta_0=None,
            param_0=None,
            continued=False,
            min_iter=3,
            f_abs_tol=1e-6,
            x_abs_tol=1e-7,
            drop_r_tol=1e-6,
            patience=5):
        """
        A convenience method to fit all the models in the grid using the Variational EM algorithm.

        :param max_iter: Maximum number of iterations. 
        :param theta_0: A dictionary of values to initialize the hyperparameters
        :param param_0: A dictionary of values to initialize the variational parameters
        :param continued: If true, continue the model fitting for more iterations.
        :param min_iter: The minimum number of iterations to run before checking for convergence.
        :param f_abs_tol: The absolute tolerance threshold for the objective (ELBO).
        :param x_abs_tol: The absolute tolerance threshold for the variational parameters.
        :param drop_r_tol: The relative tolerance for the drop in the ELBO to be considered as a red flag. It usually
        happens around convergence that the objective fluctuates due to numerical errors. This is a way to
        differentiate such random fluctuations from actual drops in the objective.
        :param patience: The maximum number of times the objective is allowed to drop before termination.
        """

        if not continued:
            self.initialize(theta_0, param_0)
            start_idx = 1
        else:
            start_idx = len(self.history['ELBO']) + 1
            for i, optr in enumerate(self.optim_results):
                self.active_models[i] = True
                optr.update(self.history['ELBO'][i], increment=False)

        patience = patience*np.ones(self.n_models)

        logging.debug("> Performing model fit...")
        if self.threads > 1:
            logging.debug(f"> Using up to {self.threads} threads.")

        # If the model is fit over a single chromosome, append this information to the
        # tqdm progress bar:
        if len(self.shapes) == 1:
            chrom, shape = list(self.shapes.items())[0]
            desc = f"Chromosome {chrom} ({shape[0]} variants)"
        else:
            desc = None

        # Progress bar:
        pbar = tqdm(range(start_idx, start_idx + max_iter),
                    disable=not self.verbose,
                    desc=desc)

        for i in pbar:

            if all([optr.stop_iteration for optr in self.optim_results]):

                # If converged, update the progress bar before exiting:
                pbar.set_postfix({
                    'Best ELBO': f"{self.history['ELBO'][-1][self.models_to_keep].max():.4f}",
                    'Models converged': f"{self.n_models - np.sum(self.active_models)}/{self.n_models}"
                })
                pbar.n = i - 1
                pbar.total = i - 1
                pbar.refresh()
                pbar.close()

                pbar.close()
                break

            self.update_theta_history()

            self.e_step()
            self.m_step()

            self.history['ELBO'].append(self.elbo(sum_axis=0))
            h2 = self.get_heritability()

            if i > 1:

                # Get the current and previous ELBO values
                curr_elbo = self.history['ELBO'][-1]
                prev_elbo = self.history['ELBO'][-2]

                for m in np.where(self.active_models)[0]:

                    if (i > min_iter) and np.isclose(prev_elbo[m], curr_elbo[m], atol=f_abs_tol, rtol=0.):
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=True,
                                                     message='Objective (ELBO) converged successfully.')
                    elif (i > min_iter) and max([np.max(np.abs(diff[:, m]))
                                               for diff in self.eta_diff.values()]) < x_abs_tol:
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=True,
                                                     message='Variational parameters converged successfully.')

                    # Check to see if the objective drops due to numerical instabilities:
                    elif curr_elbo[m] < prev_elbo[m] and not np.isclose(curr_elbo[m],
                                                                        prev_elbo[m],
                                                                        atol=0.,
                                                                        rtol=drop_r_tol):
                        patience[m] -= 1

                        if patience[m] == 0:
                            self.active_models[m] = False
                            self.optim_results[m].update(curr_elbo[m],
                                                         stop_iteration=True,
                                                         success=False,
                                                         message='Optimization is halted '
                                                                 'due to numerical instabilities.')
                        else:
                            self.optim_results[m].update(curr_elbo)

                    # Check if the model parameters behave in unexpected/pathological ways:
                    elif np.isnan(curr_elbo[m]):
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=False,
                                                     message='The objective (ELBO) is NaN.')
                    elif self.sigma_epsilon[m] <= 0.:
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=False,
                                                     message='Optimization is halted (sigma_epsilon <= 0).')
                    elif h2[m] >= 1.:
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=False,
                                                     message='Optimization is halted (h2 >= 1).')
                    else:
                        self.optim_results[m].update(curr_elbo)

                # -----------------------------------------------------------------------

            if self.models_to_keep.sum() > 0:
                pbar.set_postfix({
                    'Best ELBO': f"{self.history['ELBO'][-1][self.models_to_keep].max():.4f}",
                    'Models converged': f"{self.n_models - np.sum(self.active_models)}/{self.n_models}"
                })

        # Update posterior moments:
        self.update_posterior_moments()

        # Inspect the optimization results:
        for m, optr in enumerate(self.optim_results):
            if not optr.stop_iteration:
                self.active_models[m] = False
                optr.update(self.history['ELBO'][-1][m],
                            stop_iteration=True,
                            success=False,
                            message="Maximum iterations reached without convergence.\n"
                                    "You may need to run the model for more iterations.")

        # Inform the user about potential issues:
        if int(self.verbose) > 1:

            if self.models_to_keep.sum() > 0:
                logging.info(f"> Optimization is complete for all {self.n_models} models.")
                logging.info(f"> {np.sum(self.models_to_keep)} model(s) converged successfully.")
            else:
                logging.error("> All models failed to converge. Please check the optimization results.")

        self.validation_result = self.grid_table.copy()
        self.validation_result['ELBO'] = [optr.fun for optr in self.optim_results]
        self.validation_result['Converged'] = self.models_to_keep
        self.validation_result['Optimization_message'] = [optr.message for optr in self.optim_results]

        return self

models_to_keep property

Returns:

Type Description

A boolean array indicating which models have converged successfully.

__init__(gdl, grid, **kwargs)

Initialize the VIPRS model with a grid of hyperparameters.

Parameters:

Name Type Description Default
gdl

An instance of GWADataLoader

required
grid

An instance of HyperparameterGrid

required
kwargs

Additional keyword arguments to pass to the parent VIPRS class.

{}
Source code in viprs/model/gridsearch/VIPRSGrid.py
def __init__(self,
             gdl,
             grid,
             **kwargs):
    """
    Initialize the `VIPRS` model with a grid of hyperparameters.

    :param gdl: An instance of `GWADataLoader`
    :param grid: An instance of `HyperparameterGrid`
    :param kwargs: Additional keyword arguments to pass to the parent `VIPRS` class.
    """

    self.grid_table = grid.to_table()
    self.n_models = len(self.grid_table)
    assert self.n_models > 1

    grid_params = {c: self.grid_table[c].values for c in self.grid_table.columns}

    if 'fix_params' not in kwargs:
        kwargs['fix_params'] = grid_params
    else:
        kwargs['fix_params'].update(grid_params)

    # Make sure that the matrices are in Fortran order:
    kwargs['order'] = 'F'

    super().__init__(gdl, **kwargs)

    self.shapes = {c: (shp, self.n_models)
                   for c, shp in self.shapes.items()}
    self.active_models = None
    self.Nj = {c: Nj[:, None].astype(self.float_precision, order=self.order) for c, Nj in self.Nj.items()}
    self.optim_results = [OptimizeResult() for _ in range(self.n_models)]

e_step()

Run the E-Step of the Variational EM algorithm. Here, we update the variational parameters for each variant using coordinate ascent optimization techniques. The coordinate ascent procedure is run on all the models in the grid simultaneously. The update equations are outlined in the Supplementary Material of the following paper:

Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference. Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009. Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.

Source code in viprs/model/gridsearch/VIPRSGrid.py
def e_step(self):
    """
    Run the E-Step of the Variational EM algorithm.
    Here, we update the variational parameters for each variant using coordinate
    ascent optimization techniques. The coordinate ascent procedure is run on all the models
    in the grid simultaneously. The update equations are outlined in
    the Supplementary Material of the following paper:

    > Zabad S, Gravel S, Li Y. Fast and accurate Bayesian polygenic risk modeling with variational inference.
    Am J Hum Genet. 2023 May 4;110(5):741-761. doi: 10.1016/j.ajhg.2023.03.009.
    Epub 2023 Apr 7. PMID: 37030289; PMCID: PMC10183379.
    """

    active_model_idx = np.where(self.active_models)[0].astype(np.int32)

    for c, shapes in self.shapes.items():

        # Get the priors:
        tau_beta = self.get_tau_beta(c)
        pi = self.get_pi(c)

        # Updates for tau variational parameters:
        # NOTE: Here, we compute the variational sigma in-place to avoid the need
        # to change the order of the resulting matrix or its float precision:
        np.add(self.Nj[c] / self.sigma_epsilon, tau_beta,
               out=self.var_tau[c])
        np.log(self.var_tau[c], out=self._log_var_tau[c])

        # Compute some quantities that are needed for the per-SNP updates:
        mu_mult = self.Nj[c] / (self.var_tau[c] * self.sigma_epsilon)
        u_logs = np.log(pi) - np.log(1. - pi) + .5 * (np.log(tau_beta) - self._log_var_tau[c])

        if self.use_cpp:
            cpp_e_step_grid(self.ld_left_bound[c],
                            self.ld_indptr[c],
                            self.ld_data[c],
                            self.std_beta[c],
                            self.var_gamma[c],
                            self.var_mu[c],
                            self.eta[c],
                            self.q[c],
                            self.eta_diff[c],
                            u_logs,
                            0.5 * self.var_tau[c],
                            mu_mult,
                            self.dequantize_scale,
                            active_model_idx,
                            self.threads,
                            self.use_blas,
                            self.low_memory)
        else:

            e_step_grid(self.ld_left_bound[c],
                        self.ld_indptr[c],
                        self.ld_data[c],
                        self.std_beta[c],
                        self.var_gamma[c],
                        self.var_mu[c],
                        self.eta[c],
                        self.q[c],
                        self.eta_diff[c],
                        u_logs,
                        0.5 * self.var_tau[c],
                        mu_mult,
                        active_model_idx,
                        self.threads,
                        self.use_blas,
                        self.low_memory)

    self.zeta = self.compute_zeta()

fit(max_iter=1000, theta_0=None, param_0=None, continued=False, min_iter=3, f_abs_tol=1e-06, x_abs_tol=1e-07, drop_r_tol=1e-06, patience=5)

A convenience method to fit all the models in the grid using the Variational EM algorithm.

Parameters:

Name Type Description Default
max_iter

Maximum number of iterations.

1000
theta_0

A dictionary of values to initialize the hyperparameters

None
param_0

A dictionary of values to initialize the variational parameters

None
continued

If true, continue the model fitting for more iterations.

False
min_iter

The minimum number of iterations to run before checking for convergence.

3
f_abs_tol

The absolute tolerance threshold for the objective (ELBO).

1e-06
x_abs_tol

The absolute tolerance threshold for the variational parameters.

1e-07
drop_r_tol

The relative tolerance for the drop in the ELBO to be considered as a red flag. It usually happens around convergence that the objective fluctuates due to numerical errors. This is a way to differentiate such random fluctuations from actual drops in the objective.

1e-06
patience

The maximum number of times the objective is allowed to drop before termination.

5
Source code in viprs/model/gridsearch/VIPRSGrid.py
def fit(self,
        max_iter=1000,
        theta_0=None,
        param_0=None,
        continued=False,
        min_iter=3,
        f_abs_tol=1e-6,
        x_abs_tol=1e-7,
        drop_r_tol=1e-6,
        patience=5):
    """
    A convenience method to fit all the models in the grid using the Variational EM algorithm.

    :param max_iter: Maximum number of iterations. 
    :param theta_0: A dictionary of values to initialize the hyperparameters
    :param param_0: A dictionary of values to initialize the variational parameters
    :param continued: If true, continue the model fitting for more iterations.
    :param min_iter: The minimum number of iterations to run before checking for convergence.
    :param f_abs_tol: The absolute tolerance threshold for the objective (ELBO).
    :param x_abs_tol: The absolute tolerance threshold for the variational parameters.
    :param drop_r_tol: The relative tolerance for the drop in the ELBO to be considered as a red flag. It usually
    happens around convergence that the objective fluctuates due to numerical errors. This is a way to
    differentiate such random fluctuations from actual drops in the objective.
    :param patience: The maximum number of times the objective is allowed to drop before termination.
    """

    if not continued:
        self.initialize(theta_0, param_0)
        start_idx = 1
    else:
        start_idx = len(self.history['ELBO']) + 1
        for i, optr in enumerate(self.optim_results):
            self.active_models[i] = True
            optr.update(self.history['ELBO'][i], increment=False)

    patience = patience*np.ones(self.n_models)

    logging.debug("> Performing model fit...")
    if self.threads > 1:
        logging.debug(f"> Using up to {self.threads} threads.")

    # If the model is fit over a single chromosome, append this information to the
    # tqdm progress bar:
    if len(self.shapes) == 1:
        chrom, shape = list(self.shapes.items())[0]
        desc = f"Chromosome {chrom} ({shape[0]} variants)"
    else:
        desc = None

    # Progress bar:
    pbar = tqdm(range(start_idx, start_idx + max_iter),
                disable=not self.verbose,
                desc=desc)

    for i in pbar:

        if all([optr.stop_iteration for optr in self.optim_results]):

            # If converged, update the progress bar before exiting:
            pbar.set_postfix({
                'Best ELBO': f"{self.history['ELBO'][-1][self.models_to_keep].max():.4f}",
                'Models converged': f"{self.n_models - np.sum(self.active_models)}/{self.n_models}"
            })
            pbar.n = i - 1
            pbar.total = i - 1
            pbar.refresh()
            pbar.close()

            pbar.close()
            break

        self.update_theta_history()

        self.e_step()
        self.m_step()

        self.history['ELBO'].append(self.elbo(sum_axis=0))
        h2 = self.get_heritability()

        if i > 1:

            # Get the current and previous ELBO values
            curr_elbo = self.history['ELBO'][-1]
            prev_elbo = self.history['ELBO'][-2]

            for m in np.where(self.active_models)[0]:

                if (i > min_iter) and np.isclose(prev_elbo[m], curr_elbo[m], atol=f_abs_tol, rtol=0.):
                    self.active_models[m] = False
                    self.optim_results[m].update(curr_elbo[m],
                                                 stop_iteration=True,
                                                 success=True,
                                                 message='Objective (ELBO) converged successfully.')
                elif (i > min_iter) and max([np.max(np.abs(diff[:, m]))
                                           for diff in self.eta_diff.values()]) < x_abs_tol:
                    self.active_models[m] = False
                    self.optim_results[m].update(curr_elbo[m],
                                                 stop_iteration=True,
                                                 success=True,
                                                 message='Variational parameters converged successfully.')

                # Check to see if the objective drops due to numerical instabilities:
                elif curr_elbo[m] < prev_elbo[m] and not np.isclose(curr_elbo[m],
                                                                    prev_elbo[m],
                                                                    atol=0.,
                                                                    rtol=drop_r_tol):
                    patience[m] -= 1

                    if patience[m] == 0:
                        self.active_models[m] = False
                        self.optim_results[m].update(curr_elbo[m],
                                                     stop_iteration=True,
                                                     success=False,
                                                     message='Optimization is halted '
                                                             'due to numerical instabilities.')
                    else:
                        self.optim_results[m].update(curr_elbo)

                # Check if the model parameters behave in unexpected/pathological ways:
                elif np.isnan(curr_elbo[m]):
                    self.active_models[m] = False
                    self.optim_results[m].update(curr_elbo[m],
                                                 stop_iteration=True,
                                                 success=False,
                                                 message='The objective (ELBO) is NaN.')
                elif self.sigma_epsilon[m] <= 0.:
                    self.active_models[m] = False
                    self.optim_results[m].update(curr_elbo[m],
                                                 stop_iteration=True,
                                                 success=False,
                                                 message='Optimization is halted (sigma_epsilon <= 0).')
                elif h2[m] >= 1.:
                    self.active_models[m] = False
                    self.optim_results[m].update(curr_elbo[m],
                                                 stop_iteration=True,
                                                 success=False,
                                                 message='Optimization is halted (h2 >= 1).')
                else:
                    self.optim_results[m].update(curr_elbo)

            # -----------------------------------------------------------------------

        if self.models_to_keep.sum() > 0:
            pbar.set_postfix({
                'Best ELBO': f"{self.history['ELBO'][-1][self.models_to_keep].max():.4f}",
                'Models converged': f"{self.n_models - np.sum(self.active_models)}/{self.n_models}"
            })

    # Update posterior moments:
    self.update_posterior_moments()

    # Inspect the optimization results:
    for m, optr in enumerate(self.optim_results):
        if not optr.stop_iteration:
            self.active_models[m] = False
            optr.update(self.history['ELBO'][-1][m],
                        stop_iteration=True,
                        success=False,
                        message="Maximum iterations reached without convergence.\n"
                                "You may need to run the model for more iterations.")

    # Inform the user about potential issues:
    if int(self.verbose) > 1:

        if self.models_to_keep.sum() > 0:
            logging.info(f"> Optimization is complete for all {self.n_models} models.")
            logging.info(f"> {np.sum(self.models_to_keep)} model(s) converged successfully.")
        else:
            logging.error("> All models failed to converge. Please check the optimization results.")

    self.validation_result = self.grid_table.copy()
    self.validation_result['ELBO'] = [optr.fun for optr in self.optim_results]
    self.validation_result['Converged'] = self.models_to_keep
    self.validation_result['Optimization_message'] = [optr.message for optr in self.optim_results]

    return self

init_optim_meta()

Initialize the various quantities/objects to keep track of the optimization process. This method initializes the "history" object (which keeps track of the objective + other hyperparameters requested by the user), in addition to the OptimizeResult objects.

Source code in viprs/model/gridsearch/VIPRSGrid.py
def init_optim_meta(self):
    """
    Initialize the various quantities/objects to keep track of the optimization process.
     This method initializes the "history" object (which keeps track of the objective + other
     hyperparameters requested by the user), in addition to the OptimizeResult objects.
    """
    super().init_optim_meta()

    # Reset the OptimizeResult objects:
    for optr in self.optim_results:
        optr.reset()

initialize_theta(theta_0=None)

Initialize the global hyperparameters of the model.

Parameters:

Name Type Description Default
theta_0

A dictionary of initial values for the hyperparameters theta

None
Source code in viprs/model/gridsearch/VIPRSGrid.py
def initialize_theta(self, theta_0=None):
    """
    Initialize the global hyperparameters of the model.
    :param theta_0: A dictionary of initial values for the hyperparameters theta
    """

    self.active_models = np.array([True for _ in range(self.n_models)])

    super().initialize_theta(theta_0=theta_0)

    try:
        if self.pi.shape != (self.n_models, ):
            self.pi *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
    except AttributeError:
        self.pi *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

    try:
        if self.tau_beta.shape != (self.n_models, ):
            self.tau_beta *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
    except AttributeError:
        self.tau_beta *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

    try:
        if self.sigma_epsilon.shape != (self.n_models, ):
            self.sigma_epsilon *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
    except AttributeError:
        self.sigma_epsilon *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

    try:
        if self._sigma_g.shape != (self.n_models, ):
            self._sigma_g *= np.ones(shape=(self.n_models, ), dtype=self.float_precision)
    except AttributeError:
        self._sigma_g *= np.ones(shape=(self.n_models,), dtype=self.float_precision)

to_theta_table()

Returns:

Type Description

A pandas DataFrame containing information about the hyperparameters of the model.

Source code in viprs/model/gridsearch/VIPRSGrid.py
def to_theta_table(self):
    """
    :return: A `pandas` DataFrame containing information about the hyperparameters of the model.
    """

    if self.n_models == 1:
        return super(VIPRSGrid, self).to_theta_table()

    sig_e = self.sigma_epsilon
    h2 = self.get_heritability()
    pi = self.get_proportion_causal()

    if isinstance(self.tau_beta, dict):
        taus = dict_mean(self.tau_beta, axis=0)
    else:
        taus = self.tau_beta

    theta_table = []

    for m in range(self.n_models):

        theta_table += [
            {'Model': m, 'Parameter': 'Residual_variance', 'Value': sig_e[m]},
            {'Model': m, 'Parameter': 'Heritability', 'Value': h2[m]},
            {'Model': m, 'Parameter': 'Proportion_causal', 'Value': pi[m]},
            {'Model': m, 'Parameter': 'sigma_beta', 'Value': taus[m]}
        ]

    return pd.DataFrame(theta_table)

to_validation_table()

Returns:

Type Description

The validation table summarizing the performance of each model.

Raises:

Type Description
ValueError

if the validation result is not set.

Source code in viprs/model/gridsearch/VIPRSGrid.py
def to_validation_table(self):
    """
    :return: The validation table summarizing the performance of each model.
    :raises ValueError: if the validation result is not set.
    """
    if self.validation_result is None:
        raise ValueError("Validation result is not set!")
    elif len(self.validation_result) < 1:
        raise ValueError("Validation result is not set!")

    return pd.DataFrame(self.validation_result)

write_validation_result(v_filename, sep='\t')

After performing hyperparameter search, write a table that records that value of the objective for each combination of hyperparameters.

Parameters:

Name Type Description Default
v_filename

The filename for the validation table.

required
sep

The separator for the validation table

'\t'
Source code in viprs/model/gridsearch/VIPRSGrid.py
def write_validation_result(self, v_filename, sep="\t"):
    """
    After performing hyperparameter search, write a table
    that records that value of the objective for each combination
    of hyperparameters.
    :param v_filename: The filename for the validation table.
    :param sep: The separator for the validation table
    """

    v_df = self.to_validation_table()
    v_df.to_csv(v_filename, index=False, sep=sep)