Skip to content

Sumstats parsers

COJOSSParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics files generated by the COJO software.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class COJOSSParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics files generated by the `COJO` software.

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """

        Initialize the COJO summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """
        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        self.col_name_converter.update(
            {
                'freq': 'MAF',
                'b': 'BETA',
                'se': 'SE',
                'p': 'PVAL'
            }
        )

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the COJO summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """

    Initialize the COJO summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """
    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    self.col_name_converter.update(
        {
            'freq': 'MAF',
            'b': 'BETA',
            'se': 'SE',
            'p': 'PVAL'
        }
    )

FastGWASSParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics files generated by the FastGWA software.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class FastGWASSParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics files generated by the `FastGWA` software.

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.


    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """
        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """
        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        self.col_name_converter.update(
            {
                'AF1': 'MAF',
                'P': 'PVAL'
            }
        )

__init__(col_name_converter=None, **read_csv_kwargs)

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """
    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """
    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    self.col_name_converter.update(
        {
            'AF1': 'MAF',
            'P': 'PVAL'
        }
    )

Plink1SSParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics files generated by plink1.9.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class Plink1SSParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics files generated by `plink1.9`.

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.

    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """
        Initialize the `plink1.9` summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """

        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        self.col_name_converter.update(
            {
                'P': 'PVAL',
                'NMISS': 'N',
                'STAT': 'Z',
                'BP': 'POS'
            }
        )

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the plink1.9 summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """
    Initialize the `plink1.9` summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """

    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    self.col_name_converter.update(
        {
            'P': 'PVAL',
            'NMISS': 'N',
            'STAT': 'Z',
            'BP': 'POS'
        }
    )

Plink2SSParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics files generated by plink2.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class Plink2SSParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics files generated by `plink2`.

    !!! seealso "See Also"
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.

    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """

        Initialize the `plink2` summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """

        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        self.col_name_converter.update(
            {
                '#CHROM': 'CHR',
                'ID': 'SNP',
                'P': 'PVAL',
                'OBS_CT': 'N',
                'A1_FREQ': 'MAF',
                'T_STAT': 'Z',
                'Z_STAT': 'Z'
            }
        )

    def parse(self, file_name, drop_na=True):
        """
        Parse a summary statistics file.
        :param file_name: The path to the summary statistics file.
        :param drop_na: Drop any entries with missing values.

        :return: A pandas DataFrame containing the parsed summary statistics.
        """

        df = super().parse(file_name, drop_na=drop_na)

        if 'A2' not in df.columns:
            try:
                if 'ALT1' in df.columns:
                    df['A2'] = np.where(df['A1'] == df['ALT1'], df['REF'], df['ALT1'])
                elif 'ALT' in df.columns:
                    df['A2'] = np.where(df['A1'] == df['ALT'], df['REF'], df['ALT'])
                else:
                    warnings.warn("The reference allele A2 could not be inferred "
                                  "from the summary statistics file!")
            except KeyError:
                warnings.warn("The reference allele A2 could not be inferred "
                              "from the summary statistics file! Some of the columns needed to infer "
                              "the A2 allele are missing or coded differently than what we expect.")

        return df

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the plink2 summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """

    Initialize the `plink2` summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """

    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    self.col_name_converter.update(
        {
            '#CHROM': 'CHR',
            'ID': 'SNP',
            'P': 'PVAL',
            'OBS_CT': 'N',
            'A1_FREQ': 'MAF',
            'T_STAT': 'Z',
            'Z_STAT': 'Z'
        }
    )

parse(file_name, drop_na=True)

Parse a summary statistics file.

Parameters:

Name Type Description Default
file_name

The path to the summary statistics file.

required
drop_na

Drop any entries with missing values.

True

Returns:

Type Description

A pandas DataFrame containing the parsed summary statistics.

Source code in magenpy/parsers/sumstats_parsers.py
def parse(self, file_name, drop_na=True):
    """
    Parse a summary statistics file.
    :param file_name: The path to the summary statistics file.
    :param drop_na: Drop any entries with missing values.

    :return: A pandas DataFrame containing the parsed summary statistics.
    """

    df = super().parse(file_name, drop_na=drop_na)

    if 'A2' not in df.columns:
        try:
            if 'ALT1' in df.columns:
                df['A2'] = np.where(df['A1'] == df['ALT1'], df['REF'], df['ALT1'])
            elif 'ALT' in df.columns:
                df['A2'] = np.where(df['A1'] == df['ALT'], df['REF'], df['ALT'])
            else:
                warnings.warn("The reference allele A2 could not be inferred "
                              "from the summary statistics file!")
        except KeyError:
            warnings.warn("The reference allele A2 could not be inferred "
                          "from the summary statistics file! Some of the columns needed to infer "
                          "the A2 allele are missing or coded differently than what we expect.")

    return df

SSFParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics that are formatted according to the standardized summary statistics format adopted by the GWAS Catalog. This format is sometimes denoted as GWAS-SSF.

Reference and details: https://github.com/EBISPOT/gwas-summary-statistics-standard

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class SSFParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics that are formatted according
     to the standardized summary statistics format adopted by the GWAS Catalog. This format is
     sometimes denoted as `GWAS-SSF`.

    Reference and details:
    https://github.com/EBISPOT/gwas-summary-statistics-standard

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.

    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """

        Initialize the standardized summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """

        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        self.col_name_converter.update(
            {
                'chromosome': 'CHR',
                'base_pair_location': 'POS',
                'rsid': 'SNP',
                'effect_allele': 'A1',
                'other_allele': 'A2',
                'beta': 'BETA',
                'standard_error': 'SE',
                'effect_allele_frequency': 'MAF',
                'p_value': 'PVAL',
                'n': 'N'
            }
        )

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the standardized summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """

    Initialize the standardized summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """

    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    self.col_name_converter.update(
        {
            'chromosome': 'CHR',
            'base_pair_location': 'POS',
            'rsid': 'SNP',
            'effect_allele': 'A1',
            'other_allele': 'A2',
            'beta': 'BETA',
            'standard_error': 'SE',
            'effect_allele_frequency': 'MAF',
            'p_value': 'PVAL',
            'n': 'N'
        }
    )

SaigeSSParser

Bases: SumstatsParser

A specialized class for parsing GWAS summary statistics files generated by the SAIGE software. Reference and details: https://saigegit.github.io/SAIGE-doc/docs/single_step2.html

TODO: Ensure that the column names are correct across different trait types and the inference of the sample size is correct.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class SaigeSSParser(SumstatsParser):
    """
    A specialized class for parsing GWAS summary statistics files generated by the `SAIGE` software.
    Reference and details:
    https://saigegit.github.io/SAIGE-doc/docs/single_step2.html

    TODO: Ensure that the column names are correct across different trait types
    and the inference of the sample size is correct.

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.

    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """
        Initialize the `SAIGE` summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """
        super().__init__(col_name_converter, **read_csv_kwargs)

        self.col_name_converter = self.col_name_converter or {}

        # NOTE: SAIGE considers Allele2 to be the effect allele, so
        # we switch their designation here:
        self.col_name_converter.update(
            {
                'MarkerID': 'SNP',
                'Allele1': 'A2',
                'Allele2': 'A1',
                'AF_Allele2': 'MAF',
                'AC_Allele2': 'MAC',
                'Tstat': 'Z',
                'p.value': 'PVAL',
            }
        )

    def parse(self, file_name, drop_na=True):
        """
        Parse the summary statistics file.
        :param file_name: The path to the summary statistics file.
        :param drop_na: Drop any entries with missing values.

        :return: A pandas DataFrame containing the parsed summary statistics.
        """

        df = super().parse(file_name, drop_na=drop_na)

        # Infer the sample size N
        df['N'] = df['MAC'] / (2.*df['MAF'])

        return df

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the SAIGE summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """
    Initialize the `SAIGE` summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """
    super().__init__(col_name_converter, **read_csv_kwargs)

    self.col_name_converter = self.col_name_converter or {}

    # NOTE: SAIGE considers Allele2 to be the effect allele, so
    # we switch their designation here:
    self.col_name_converter.update(
        {
            'MarkerID': 'SNP',
            'Allele1': 'A2',
            'Allele2': 'A1',
            'AF_Allele2': 'MAF',
            'AC_Allele2': 'MAC',
            'Tstat': 'Z',
            'p.value': 'PVAL',
        }
    )

parse(file_name, drop_na=True)

Parse the summary statistics file.

Parameters:

Name Type Description Default
file_name

The path to the summary statistics file.

required
drop_na

Drop any entries with missing values.

True

Returns:

Type Description

A pandas DataFrame containing the parsed summary statistics.

Source code in magenpy/parsers/sumstats_parsers.py
def parse(self, file_name, drop_na=True):
    """
    Parse the summary statistics file.
    :param file_name: The path to the summary statistics file.
    :param drop_na: Drop any entries with missing values.

    :return: A pandas DataFrame containing the parsed summary statistics.
    """

    df = super().parse(file_name, drop_na=drop_na)

    # Infer the sample size N
    df['N'] = df['MAC'] / (2.*df['MAF'])

    return df

SumstatsParser

Bases: object

A wrapper class for parsing summary statistics files that are written by statistical genetics software for Genome-wide Association testing. A common challenge is the fact that different software tools output summary statistics in different formats and with different column names. Thus, this class provides a common interface for parsing summary statistics files from different software tools and aims to make this process as seamless as possible.

The class is designed to be extensible, so that users can easily add new parsers for different software tools.

Attributes:

Name Type Description
col_name_converter

A dictionary mapping column names in the original table to magenpy's column names.

read_csv_kwargs

Keyword arguments to pass to pandas' read_csv.

Source code in magenpy/parsers/sumstats_parsers.py
class SumstatsParser(object):
    """
    A wrapper class for parsing summary statistics files that are written by statistical genetics software
    for Genome-wide Association testing. A common challenge is the fact that different software tools
    output summary statistics in different formats and with different column names. Thus, this class
    provides a common interface for parsing summary statistics files from different software tools
    and aims to make this process as seamless as possible.

    The class is designed to be extensible, so that users can easily add new parsers for different software tools.

    !!! seealso "See Also"
        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]

    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.

    """

    def __init__(self, col_name_converter=None, **read_csv_kwargs):
        """
        Initialize the summary statistics parser.

        :param col_name_converter: A dictionary/string mapping column names
        in the original table to magenpy's column names for the various
        summary statistics. If a string, it should be a comma-separated list of
        key-value pairs (e.g. 'rsid=SNP,pos=POS').
        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
        """

        if isinstance(col_name_converter, str):
            self.col_name_converter = {
                k: v for entry in col_name_converter.split(',') for k, v in [entry.strip().split('=')]
                if len(entry.strip()) > 0
            }
        else:
            self.col_name_converter = col_name_converter

        self.read_csv_kwargs = read_csv_kwargs

        # If the delimiter is not specified, assume whitespace by default:
        if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
            self.read_csv_kwargs['sep'] = r'\s+'

    def parse(self, file_name, drop_na=True):
        """
        Parse a summary statistics file.
        :param file_name: The path to the summary statistics file.
        :param drop_na: If True, drop any entries with missing values.

        :return: A pandas DataFrame containing the parsed summary statistics.
        """

        df = pd.read_csv(file_name, **self.read_csv_kwargs)

        if drop_na:
            df = df.dropna()

        if self.col_name_converter is not None:
            df.rename(columns=self.col_name_converter, inplace=True)

        try:
            df['POS'] = df['POS'].astype(np.int32)
        except KeyError:
            pass

        return df

__init__(col_name_converter=None, **read_csv_kwargs)

Initialize the summary statistics parser.

Parameters:

Name Type Description Default
col_name_converter

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

None
read_csv_kwargs

Keyword arguments to pass to pandas' read_csv

{}
Source code in magenpy/parsers/sumstats_parsers.py
def __init__(self, col_name_converter=None, **read_csv_kwargs):
    """
    Initialize the summary statistics parser.

    :param col_name_converter: A dictionary/string mapping column names
    in the original table to magenpy's column names for the various
    summary statistics. If a string, it should be a comma-separated list of
    key-value pairs (e.g. 'rsid=SNP,pos=POS').
    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
    """

    if isinstance(col_name_converter, str):
        self.col_name_converter = {
            k: v for entry in col_name_converter.split(',') for k, v in [entry.strip().split('=')]
            if len(entry.strip()) > 0
        }
    else:
        self.col_name_converter = col_name_converter

    self.read_csv_kwargs = read_csv_kwargs

    # If the delimiter is not specified, assume whitespace by default:
    if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
        self.read_csv_kwargs['sep'] = r'\s+'

parse(file_name, drop_na=True)

Parse a summary statistics file.

Parameters:

Name Type Description Default
file_name

The path to the summary statistics file.

required
drop_na

If True, drop any entries with missing values.

True

Returns:

Type Description

A pandas DataFrame containing the parsed summary statistics.

Source code in magenpy/parsers/sumstats_parsers.py
def parse(self, file_name, drop_na=True):
    """
    Parse a summary statistics file.
    :param file_name: The path to the summary statistics file.
    :param drop_na: If True, drop any entries with missing values.

    :return: A pandas DataFrame containing the parsed summary statistics.
    """

    df = pd.read_csv(file_name, **self.read_csv_kwargs)

    if drop_na:
        df = df.dropna()

    if self.col_name_converter is not None:
        df.rename(columns=self.col_name_converter, inplace=True)

    try:
        df['POS'] = df['POS'].astype(np.int32)
    except KeyError:
        pass

    return df