Compute utils

`combine_coefficient_tables(coef_tables, coef_col='BETA')` ¶

Combine a list of coefficient tables (output from a PRS model) into a single table that can be used for downstream tasks, such scoring and evaluation. Note that this implementation assumes that the coefficients tables were generated for the same set of variants, from a grid-search or similar procedure.

Parameters:

Name	Type	Description	Default
`coef_tables`		A list of pandas dataframes containing variant information as well as inferred coefficients.	required
`coef_col`		The name of the column containing the coefficients.	`'BETA'`

Returns:

Type	Description
	A single pandas dataframe with the combined coefficients. The new coefficient columns will be labelled as BETA_0, BETA_1, etc.

Source code in viprs/utils/compute_utils.py

def combine_coefficient_tables(coef_tables, coef_col='BETA'):
    """
    Combine a list of coefficient tables (output from a PRS model) into a single
    table that can be used for downstream tasks, such scoring and evaluation. Note that
    this implementation assumes that the coefficients tables were generated for the same
    set of variants, from a grid-search or similar procedure.

    :param coef_tables: A list of pandas dataframes containing variant information as well as
    inferred coefficients.
    :param coef_col: The name of the column containing the coefficients.
    :return: A single pandas dataframe with the combined coefficients. The new coefficient columns will be
    labelled as BETA_0, BETA_1, etc.
    """

    # Sanity checks:
    assert all([coef_col in t.columns for t in coef_tables]), "All tables must contain the coefficient column."
    assert all([len(t) == len(coef_tables[0]) for t in coef_tables]), "All tables must have the same number of rows."

    if len(coef_tables) == 1:
        return coef_tables[0]

    ref_table = coef_tables[0].copy()
    ref_table.rename(columns={coef_col: f'{coef_col}_0'}, inplace=True)

    # Extract the coefficients from the other tables:
    return pd.concat([ref_table, *[t[[coef_col]].rename(columns={coef_col: f'{coef_col}_{i}'})
                                   for i, t in enumerate(coef_tables[1:], 1)]], axis=1)

`dict_concat(d, axis=0)` ¶

Concatenate the values of a dictionary into a single vector

Parameters:

Name	Type	Description	Default
`d`		A dictionary where values are numeric scalars or vectors	required
`axis`		Concatenate along given axis.	`0`

Source code in viprs/utils/compute_utils.py

def dict_concat(d, axis=0):
    """
    Concatenate the values of a dictionary into a single vector
    :param d: A dictionary where values are numeric scalars or vectors
    :param axis: Concatenate along given axis.
    """
    if len(d) == 1:
        return d[next(iter(d))]
    else:
        return np.concatenate([d[c] for c in sorted(d.keys())], axis=axis)

`dict_dot(d1, d2)` ¶

Perform dot product on the elements of d1 and d2

Parameters:

Name	Type	Description	Default
`d1`		A dictionary where values are numeric scalars or vectors	required
`d2`		A dictionary where values are numeric scalars or vectors	required

Source code in viprs/utils/compute_utils.py

def dict_dot(d1, d2):
    """
    Perform dot product on the elements of d1 and d2
    :param d1: A dictionary where values are numeric scalars or vectors
    :param d2: A dictionary where values are numeric scalars or vectors
    """
    return np.sum([np.dot(d1[c], d2[c]) for c in d1.keys()])

`dict_elementwise_dot(d1, d2)` ¶

Apply element-wise product between the values of two dictionaries

Parameters:

Name	Type	Description	Default
`d1`		A dictionary where values are numeric scalars or vectors	required
`d2`		A dictionary where values are numeric scalars or vectors	required

Source code in viprs/utils/compute_utils.py

def dict_elementwise_dot(d1, d2):
    """
    Apply element-wise product between the values of two dictionaries

    :param d1: A dictionary where values are numeric scalars or vectors
    :param d2: A dictionary where values are numeric scalars or vectors
    """
    return {c: d1[c]*d2[c] for c, v in d1.items()}

`dict_elementwise_transform(d, transform)` ¶

Apply a transformation to values of a dictionary

Parameters:

Name	Type	Description	Default
`d`		A dictionary where values are numeric scalars or vectors	required
`transform`		A function to apply to	required

Source code in viprs/utils/compute_utils.py

def dict_elementwise_transform(d, transform):
    """
    Apply a transformation to values of a dictionary
    :param d: A dictionary where values are numeric scalars or vectors
    :param transform: A function to apply to
    """
    return {c: np.vectorize(transform)(v) for c, v in d.items()}

`dict_max(d, axis=None)` ¶

Estimate the maximum of the values of a dictionary

Parameters:

Name	Type	Description	Default
`d`		A dictionary where values are numeric scalars or vectors	required
`axis`		Perform aggregation along given axis.	`None`

Source code in viprs/utils/compute_utils.py

def dict_max(d, axis=None):
    """
    Estimate the maximum of the values of a dictionary
    :param d: A dictionary where values are numeric scalars or vectors
    :param axis: Perform aggregation along given axis.
    """
    return np.max(np.array([np.max(v, axis=axis) for v in d.values()]), axis=axis)

`dict_mean(d, axis=None)` ¶

Estimate the mean of the values of a dictionary

Parameters:

Name	Type	Description	Default
`d`		A dictionary where values are numeric scalars or vectors	required
`axis`		Perform aggregation along given axis.	`None`

Source code in viprs/utils/compute_utils.py

def dict_mean(d, axis=None):
    """
    Estimate the mean of the values of a dictionary
    :param d: A dictionary where values are numeric scalars or vectors
    :param axis: Perform aggregation along given axis.
    """
    return np.mean(np.array([np.mean(v, axis=axis) for v in d.values()]), axis=axis)

`dict_repeat(value, shapes)` ¶

Given a value, create a dictionary where the value is repeated according to the shapes parameter

Parameters:

Name	Type	Description	Default
`shapes`		A dictionary of shapes. Key is arbitrary, value is integer input to np.repeat	required
`value`		The value to repeat	required

Source code in viprs/utils/compute_utils.py

def dict_repeat(value, shapes):
    """
    Given a value, create a dictionary where the value is repeated
    according to the shapes parameter
    :param shapes: A dictionary of shapes. Key is arbitrary, value is integer input to np.repeat
    :param value:  The value to repeat
    """
    return {c: value*np.ones(shp) for c, shp in shapes.items()}

`dict_set(d, value)` ¶

Parameters:

Name	Type	Description	Default
`d`		A dictionary where values are numeric vectors	required
`value`		A value to set for all vectors	required

Source code in viprs/utils/compute_utils.py

def dict_set(d, value):
    """
    :param d: A dictionary where values are numeric vectors
    :param value: A value to set for all vectors
    """
    for c in d:
        d[c][:] = value

    return d

`dict_sum(d, axis=None, transform=None)` ¶

Estimate the sum of the values of a dictionary

Parameters:

Name	Description	Default
`d`	A dictionary where values are numeric scalars or vectors	required
`axis`	Perform aggregation along given axis.	`None`
`transform`	Transformation to apply before summing.	`None`

Source code in viprs/utils/compute_utils.py

def dict_sum(d, axis=None, transform=None):
    """
    Estimate the sum of the values of a dictionary
    :param d: A dictionary where values are numeric scalars or vectors
    :param axis: Perform aggregation along given axis.
    :param transform: Transformation to apply before summing.
    """
    if transform is None:
        return np.sum(np.array([np.sum(v, axis=axis) for v in d.values()]), axis=axis)
    else:
        return np.sum(np.array([np.sum(transform(v), axis=axis) for v in d.values()]), axis=axis)

`expand_column_names(c_name, shape, sep='_')` ¶

Given a desired column name c_name and a matrix shape that we'd like to apply the column name to, return a list of column names for every column in the matrix. The column names will be in the form of c_name followed by an index, separated by sep.

For example, if the column name is BETA, the shape is (100, 3) and the separator is _, we return a list with: [BETA_0, BETA_1, BETA_2]

If the matrix in question is a vector, we just return the column name without any indices appended to it.

Parameters:

Name	Description	Default
`c_name`	A string object	required
`shape`	The shape of a numpy matrix or vector	required
`sep`	The separator	`'_'`

Returns:

Type	Description
	A list of column names

Source code in viprs/utils/compute_utils.py

def expand_column_names(c_name, shape, sep='_'):
    """
    Given a desired column name `c_name` and a matrix `shape`
    that we'd like to apply the column name to, return a list of
    column names for every column in the matrix. The column names will be
    in the form of `c_name` followed by an index, separated by `sep`.

    For example, if the column name is `BETA`, the
    shape is (100, 3) and the separator is `_`, we return a list with:
    [`BETA_0`, `BETA_1`, `BETA_2`]

    If the matrix in question is a vector, we just return the column name
    without any indices appended to it.

    :param c_name: A string object
    :param shape: The shape of a numpy matrix or vector
    :param sep: The separator

    :return: A list of column names
    """

    if len(shape) < 2:
        return [c_name]
    elif shape[1] == 1:
        return [c_name]
    else:
        return [f'{c_name}{sep}{i}' for i in range(shape[1])]

`fits_in_memory(alloc_size, max_prop=0.9)` ¶

Check whether there's enough memory resources to load an object with the given allocation size (in MB).

Parameters:

Name	Type	Description	Default
`alloc_size`		The allocation size	required
`max_prop`		The maximum proportion of available memory allowed for the object	`0.9`

Source code in viprs/utils/compute_utils.py

def fits_in_memory(alloc_size, max_prop=.9):
    """
    Check whether there's enough memory resources to load an object
    with the given allocation size (in MB).
    :param alloc_size: The allocation size
    :param max_prop: The maximum proportion of available memory allowed for the object
    """

    avail_mem = psutil.virtual_memory().available / (1024.0 ** 2)

    if alloc_size / avail_mem > max_prop:
        return False
    else:
        return True

Compute utils

combine_coefficient_tables(coef_tables, coef_col='BETA') ¶

dict_concat(d, axis=0) ¶

dict_dot(d1, d2) ¶

dict_elementwise_dot(d1, d2) ¶

dict_elementwise_transform(d, transform) ¶

dict_max(d, axis=None) ¶

dict_mean(d, axis=None) ¶

dict_repeat(value, shapes) ¶

dict_set(d, value) ¶

dict_sum(d, axis=None, transform=None) ¶

expand_column_names(c_name, shape, sep='_') ¶

fits_in_memory(alloc_size, max_prop=0.9) ¶

`combine_coefficient_tables(coef_tables, coef_col='BETA')` ¶

`dict_concat(d, axis=0)` ¶

`dict_dot(d1, d2)` ¶

`dict_elementwise_dot(d1, d2)` ¶

`dict_elementwise_transform(d, transform)` ¶

`dict_max(d, axis=None)` ¶

`dict_mean(d, axis=None)` ¶

`dict_repeat(value, shapes)` ¶

`dict_set(d, value)` ¶

`dict_sum(d, axis=None, transform=None)` ¶

`expand_column_names(c_name, shape, sep='_')` ¶

`fits_in_memory(alloc_size, max_prop=0.9)` ¶