Source code for treecorr.catalog

# Copyright (c) 2003-2024 by Mike Jarvis
#
# TreeCorr is free software: redistribution and use in source and binary forms,
# with or without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions, and the disclaimer given in the accompanying LICENSE
#    file.
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions, and the disclaimer given in the documentation
#    and/or other materials provided with the distribution.

"""
.. module:: catalog
"""

import numpy as np
import coord
import weakref
import copy
import os

from . import _treecorr
from .reader import FitsReader, HdfReader, AsciiReader, PandasReader, ParquetReader
from .config import merge_config, setup_logger, get, get_from_list
from .util import parse_file_type, LRU_Cache, make_writer, make_reader, set_omp_threads
from .field import NField, KField, ZField, VField, GField, TField, QField

[docs]class Catalog(object):
    r"""A set of input data (positions and other quantities) to be correlated.

    A Catalog object keeps track of the relevant information for a number of objects to
    be correlated.  The objects each have some kind of position (for instance (x,y), (ra,dec),
    (x,y,z), etc.), and possibly some extra information such as weights (w), shear values (g1,g2),
    scalar values (k), or vector values (v1,v2).

    .. note::

        See `Shear Conventions` for some discussion of the conventions used in TreeCorr
        for the orientation of the shear values.

    The simplest way to build a Catalog is to simply pass in numpy arrays for each
    piece of information you want included.  For instance::

        >>> cat = treecorr.Catalog(x=x, y=y, k=k, w=w)

    Each of these input paramters should be a numpy array, where each corresponding element
    is the value for that object.  Of course, all the arrays should be the same size.

    In some cases, there are additional required parameters.  For instance, with RA and Dec
    positions, you need to declare what units the given input values use::

        >>> cat = treecorr.Catalog(ra=ra, dec=dec, g1=g1, g2=g2,
        ...                        ra_units='hour', dec_units='deg')

    For (ra,dec) positions, these units fields are required to specify the units of the angular
    values.  For (x,y) positions, the units are optional (and usually unnecessary).

    You can also initialize a Catalog by reading in columns from a file.  For instance::

        >>> cat = treecorr.Catalog('data.fits', ra_col='ALPHA2000', dec_col='DELTA2000',
        ...                        g1_col='E1', g2_col='E2', ra_units='deg', dec_units='deg')

    This reads the given columns from the input file.  The input file may be a FITS file,
    an HDF5 file, a Parquet file, or an ASCII file.  Normally the file type is determined
    according to the file's extension (e.g. '.fits' here), but it can also be set explicitly
    with ``file_type``.

    For FITS, HDF5, and Parquet files, the column names should be strings as shown above.
    For ASCII files, they may be strings if the input file has column names.  But you may
    also use integer values giving the index of which column to use.  We use a 1-based convention
    for these, so x_col=1 would mean to use the first column as the x value. (0 means don't
    read that column.)

    Sometimes the columns in the input file aren't quite what you want.  Rather you need to
    do some simple calculation based on the input columns.  For instance, PSF rho statistics
    generally entail taking the difference of the model and data g1,g2 columns.  To deal with
    this, you can use e.g. ``g1_eval`` and ``g2_eval``, which use the Python eval function
    to evaluate a string.  The string can use the names of columns in the input file, so long
    as these columns are specified in the ``extra_cols`` parameter.  For instance::

        >>> cat = treecorr.Catalog('data.fits', ra_col='ALPHA2000', dec_col='DELTA2000',
        ...                        ra_units='deg', dec_units='deg',
        ...                        g1_eval='G1_MODEL - G1_DATA', g2_eval='G2_MODEL - G2_DATA',
        ...                        extra_cols=['G1_MODEL', 'G1_DATA', 'G2_MODEL', 'G2_DATA'])

    The eval strings are allowed to use numpy, math or coord functions if desired.  If you
    need additional modules, you can update the list ``treecorr.Catalog.eval_modules`` to
    add the module(s) you need.

    Finally, you may store all the various parameters in a configuration dict
    and just pass the dict as an argument after the file name::

        >>> config = { 'ra_col' : 'ALPHA2000',
        ...            'dec_col' : 'DELTA2000',
        ...            'g1_col' : 'E1',
        ...            'g2_col' : 'E2',
        ...            'ra_units' : 'deg',
        ...            'dec_units' : 'deg' }
        >>> cat = treecorr.Catalog(file_name, config)

    This can be useful for encapsulating all the TreeCorr options in a single place in your
    code, which might be used multiple times.  Notably, this syntax ignores any dict keys
    that are not relevant to the Catalog construction, so you can use the same config dict
    for the Catalog and your correlation objects, which can be convenient.

    See also `Configuration Parameters` for complete descriptions of all of the relevant
    configuration parameters, particularly the first section `Parameters about the input file(s)`.

    You may also override any configuration parameters or add additional parameters as kwargs
    after the config dict.  For instance, to flip the sign of the g1 values after reading
    from the input file, you could write::

        >>> cat1 = treecorr.Catalog(file_name, config, flip_g1=True)

    After construction, a Catalog object will have the following attributes:

    Attributes:

        x:      The x positions, if defined, as a numpy array (converted to radians if x_units
                was given). (None otherwise)
        y:      The y positions, if defined, as a numpy array (converted to radians if y_units
                was given). (None otherwise)
        z:      The z positions, if defined, as a numpy array. (None otherwise)
        ra:     The right ascension, if defined, as a numpy array (in radians). (None otherwise)
        dec:    The declination, if defined, as a numpy array (in radians). (None otherwise)
        r:      The distance, if defined, as a numpy array. (None otherwise)
        w:      The weights, as a numpy array. (All 1's if no weight column provided.)
        wpos:   The weights for position centroiding, as a numpy array, if given. (None otherwise,
                which means that implicitly wpos = w.)
        k:      The scalar field, kappa, if defined, as a numpy array. (None otherwise)
        z1:     The z1 component of a complex scalar, if defined, as a numpy array. (None otherwise)
        z2:     The z2 component of a complex scalar, if defined, as a numpy array. (None otherwise)
        v1:     The v1 component of a vector, if defined, as a numpy array. (None otherwise)
        v2:     The v2 component of a vector, if defined, as a numpy array. (None otherwise)
        g1:     The g1 component of a shear, if defined, as a numpy array. (None otherwise)
        g2:     The g2 component of a shear, if defined, as a numpy array. (None otherwise)
        t1:     The 1st component of a trefoil field, if defined, as a numpy array. (None otherwise)
        t2:     The 2nd component of a trefoil field, if defined, as a numpy array. (None otherwise)
        q1:     The 1st component of a quatrefoil field, if defined, as a numpy array.
                (None otherwise)
        q2:     The 2nd component of a quatrefoil field, if defined, as a numpy array.
                (None otherwise)
        patch:  The patch number of each object, if patches are being used. (None otherwise)
                If the entire catalog is a single patch, then ``patch`` may be an int.
        ntot:   The total number of objects (including those with zero weight if
                ``keep_zero_weight`` is set to True)
        nobj:   The number of objects with non-zero weight
        sumw:   The sum of the weights
        vark:   The variance of the scalar field (0 if k is not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 (\kappa-\langle \kappa \rangle)^2)/\sum(w)`.
                    which is more like :math:`\langle w \rangle \mathrm{Var}(\kappa)`.
                    It is only used for ``var_method='shot'``, where the noise estimate is this
                    value divided by the total weight per bin, so this is the right quantity
                    to use for that.

        varz:   The variance per component of the complex scalar field (0 if z1,z2 are not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 |z - \langle z \rangle|^2)/\sum(w)`,
                    which is more like :math:`\langle w \rangle \mathrm{Var}(z)`.
                    As for ``vark``, this is the right quantity to use for the ``'shot'``
                    noise estimate.

        varv:   The variance per component of the vector field (0 if v1,v2 are not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 |v - \langle v \rangle|^2)/\sum(w)`,
                    which is more like :math:`\langle w \rangle \mathrm{Var}(v)`.
                    As for ``vark``, this is the right quantity to use for the ``'shot'``
                    noise estimate.

        varg:   The variance per component of the shear field (aka shape noise)
                (0 if g1,g2 are not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 |g-\langle g \rangle|^2)/\sum(w)`,
                    which is more like :math:`\langle w \rangle \mathrm{Var}(g)`.
                    As for ``vark``, this is the right quantity to use for the ``'shot'``
                    noise estimate.

        vart:   The variance per component of the trefoil field (0 if g1,g2 are not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 |t-\langle t \rangle|^2)/\sum(w)`,
                    which is more like :math:`\langle w \rangle \mathrm{Var}(t)`.
                    As for ``vark``, this is the right quantity to use for the ``'shot'``
                    noise estimate.

        varq:   The variance per component of the quatrefoil field (0 if g1,g2 are not defined)

                .. note::

                    If there are weights, this is really
                    :math:`\sum(w^2 |q-\langle q \rangle|^2)/\sum(w)`,
                    which is more like :math:`\langle w \rangle \mathrm{Var}(q)`.
                    As for ``vark``, this is the right quantity to use for the ``'shot'``
                    noise estimate.

        name:   When constructed from a file, this will be the file_name.  It is only used as
                a reference name in logging output  after construction, so if you construct it
                from data vectors directly, it will be ``''``.  You may assign to it if you want to
                give this catalog a specific name.

        coords: Which kind of coordinate system is defined for this catalog.
                The possibilities for this attribute are:

                    - 'flat' = 2-dimensional flat coordinates.  Set when x,y are given.
                    - 'spherical' = spherical coordinates.  Set when ra,dec are given.
                    - '3d' = 3-dimensional coordinates.  Set when x,y,z or ra,dec,r are given.

        field:  If any of the `get?Field <Catalog.getNField>` methods have been called to construct
                a field from this catalog (either explicitly or implicitly via a `corr.process()
                <Corr2.process>` command, then this attribute will hold the most recent
                field to have been constructed.

                .. note::

                    It holds this field as a weakref, so if caching is turned off with
                    ``resize_cache(0)``, and the field has been garbage collected, then this
                    attribute will be None.

    Parameters:
        file_name (str):    The name of the catalog file to be read in. (default: None, in which
                            case the columns need to be entered directly with ``x``, ``y``, etc.)

        config (dict):      A configuration dict which defines attributes about how to read the
                            file.  Any optional kwargs may be given here in the config dict if
                            desired.  Invalid keys in the config dict are ignored. (default: None)

    Keyword Arguments:

        num (int):          Which number catalog are we reading.  e.g. for NG correlations the
                            catalog for the N has num=0, the one for G has num=1.  This is only
                            necessary if you are using a config dict where things like ``x_col``
                            have multiple values. (default: 0)
        logger:             If desired, a Logger object for logging. (default: None, in which case
                            one will be built according to the config dict's verbose level.)
        is_rand (bool):     If this is a random file, then setting is_rand to True will let them
                            skip k_col, g1_col, and g2_col if they were set for the main catalog.
                            (default: False)

        x (array):          The x values. (default: None; When providing values directly, either
                            x,y are required or ra,dec are required.)
        y (array):          The y values. (default: None; When providing values directly, either
                            x,y are required or ra,dec are required.)
        z (array):          The z values, if doing 3d positions. (default: None; invalid in
                            conjunction with ra, dec.)
        ra (array):         The RA values. (default: None; When providing values directly, either
                            x,y are required or ra,dec are required.)
        dec (array):        The Dec values. (default: None; When providing values directly, either
                            x,y are required or ra,dec are required.)
        r (array):          The r values (the distances of each source from Earth). (default: None;
                            invalid in conjunction with x, y.)
        w (array):          The weights to apply when computing the correlations. (default: None)
        wpos (array):       The weights to use for position centroiding. (default: None, which
                            means to use the value weights, w, to weight the positions as well.)
        flag (array):       An optional array of flags, indicating objects to skip.  Rows with
                            flag != 0 (or technically flag & ~ok_flag != 0) will be given a weight
                            of 0. (default: None)
        k (array):          The kappa values to use for scalar correlations. (This may represent
                            any scalar field.) (default: None)
        z1 (array):         The z1 values to use for complex scalar correlations. (default: None)
        z2 (array):         The z2 values to use for complex scalar correlations. (default: None)
        v1 (array):         The v1 values to use for vector correlations. (default: None)
        v2 (array):         The v2 values to use for vector correlations. (default: None)
        g1 (array):         The g1 values to use for shear correlations. (g1,g2 may represent any
                            spin-2 field.) (default: None)
        g2 (array):         The g2 values to use for shear correlations. (g1,g2 may represent any
                            spin-2 field.) (default: None)
        t1 (array):         The t1 values to use for trefoil (spin-3) correlations. (default: None)
        t2 (array):         The t2 values to use for trefoil (spin-3) correlations. (default: None)
        q1 (array):         The q1 values to use for quatrefoil (spin-4) correlations.
                            (default: None)
        q2 (array):         The q2 values to use for quatrefoil (spin-4) correlations.
                            (default: None)
        patch (array or int): Optionally, patch numbers to use for each object. (default: None)

                            .. note::

                                This may also be an int if the entire catalog represents a
                                single patch.  If ``patch_centers`` is given this will select those
                                items from the full input that correspond to the given patch
                                number.  Similarly if ``patch_col`` is given.

                                If neither of these are given, then all items are set to have the
                                given patch number, and ``npatch`` is required to set the total
                                number of patches, which this catalog is a part of.

        patch_centers (array or str): Alternative to setting patch by hand or using kmeans, you
                            may instead give patch_centers either as a file name or an array
                            from which the patches will be determined. (default: None)

        file_type (str):    What kind of file is the input file. Valid options are 'ASCII', 'FITS'
                            'HDF', or 'Parquet' (default: if the file_name extension starts with
                            .fit, then use 'FITS', or with .hdf, then use 'HDF', or with '.par',
                            then use 'Parquet', else 'ASCII')
        delimiter (str):    For ASCII files, what delimiter to use between values. (default: None,
                            which means any whitespace)
        comment_marker (str): For ASCII files, what token indicates a comment line. (default: '#')
        first_row (int):    Which row to take as the first row to be used. (default: 1)
        last_row (int):     Which row to take as the last row to be used. (default: -1, which means
                            the last row in the file)
        every_nth (int):    Only use every nth row of the input catalog. (default: 1)

        npatch (int):       How many patches to split the catalog into (using kmeans if no other
                            patch information is provided) for the purpose of jackknife variance
                            or other options that involve running via patches. (default: 1)

                            .. note::

                                If the catalog has ra,dec,r positions, the patches will
                                be made using just ra,dec.

                            If ``patch`` is given, then this sets the total number of patches
                            that are relevant for the area that was split into patches, which
                            may include more catalogs than just this one.

        kmeans_init (str):  If using kmeans to make patches, which init method to use.
                            cf. `Field.run_kmeans` (default: 'tree')
        kmeans_alt (bool):  If using kmeans to make patches, whether to use the alternate kmeans
                            algorithm. cf. `Field.run_kmeans` (default: False)

        x_col (str or int): The column to use for the x values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.
                            When reading from a file, either x_col and y_col are required or ra_col
                            and dec_col are required.)
        y_col (str or int): The column to use for the y values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.
                            When reading from a file, either x_col and y_col are required or ra_col
                            and dec_col are required.)
        z_col (str or int): The column to use for the z values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column;
                            invalid in conjunction with ra_col, dec_col.)
        ra_col (str or int): The column to use for the ra values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.
                            When reading from a file, either x_col and y_col are required or ra_col
                            and dec_col are required.)
        dec_col (str or int): The column to use for the dec values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.
                            When reading from a file, either x_col and y_col are required or ra_col
                            and dec_col are required.)
        r_col (str or int): The column to use for the r values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column;
                            invalid in conjunction with x_col, y_col.)

        x_units (str):      The units to use for the x values, given as a string.  Valid options are
                            arcsec, arcmin, degrees, hours, radians. (default: radians, although
                            with (x,y) positions, you can often just ignore the units, and the
                            output separations will be in whatever units x and y are in.)
        y_units (str):      The units to use for the y values, given as a string.  Valid options are
                            arcsec, arcmin, degrees, hours, radians. (default: radians, although
                            with (x,y) positions, you can often just ignore the units, and the
                            output separations will be in whatever units x and y are in.)
        ra_units (str):     The units to use for the ra values, given as a string.  Valid options
                            are arcsec, arcmin, degrees, hours, radians. (required when using
                            ra_col or providing ra directly)
        dec_units (str):    The units to use for the dec values, given as a string.  Valid options
                            are arcsec, arcmin, degrees, hours, radians. (required when using
                            dec_col or providing dec directly)

        k_col (str or int): The column to use for the kappa values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        z1_col (str or int): The column to use for the z1 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        z2_col (str or int): The column to use for the z2 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        v1_col (str or int): The column to use for the v1 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        v2_col (str or int): The column to use for the v2 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        g1_col (str or int): The column to use for the g1 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        g2_col (str or int): The column to use for the g2 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        t1_col (str or int): The column to use for the t1 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        t2_col (str or int): The column to use for the t2 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        q1_col (str or int): The column to use for the q1 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        q2_col (str or int): The column to use for the q2 values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        patch_col (str or int): The column to use for the patch numbers. An integer is only allowed
                            for ASCII files. (default: '0', which means not to read in this column.)
        w_col (str or int): The column to use for the weight values. An integer is only allowed for
                            ASCII files. (default: '0', which means not to read in this column.)
        wpos_col (str or int): The column to use for the position weight values. An integer is only
                            allowed for ASCII files. (default: '0', which means not to read in this
                            column, in which case wpos=w.)
        flag_col (str or int): The column to use for the flag values. An integer is only allowed for
                            ASCII files. Any row with flag != 0 (or technically flag & ~ok_flag
                            != 0) will be given a weight of 0. (default: '0', which means not to
                            read in this column.)
        ignore_flag (int):  Which flags should be ignored. (default: all non-zero flags are ignored.
                            Equivalent to ignore_flag = ~0.)
        ok_flag (int):      Which flags should be considered ok. (default: 0. i.e. all non-zero
                            flags are ignored.)
        allow_xyz (bool):   Whether to allow x,y,z values in conjunction with ra,dec.  Normally,
                            it is an error to have both kinds of positions, but if you know that
                            the x,y,z, values are consistent with the given ra,dec values, it
                            can save time to input them, rather than calculate them using trig
                            functions. (default: False)

        flip_z1 (bool):     Whether to flip the sign of the input z1 values. (default: False)
        flip_z2 (bool):     Whether to flip the sign of the input z2 values. (default: False)
        flip_v1 (bool):     Whether to flip the sign of the input v1 values. (default: False)
        flip_v2 (bool):     Whether to flip the sign of the input v2 values. (default: False)
        flip_g1 (bool):     Whether to flip the sign of the input g1 values. (default: False)
        flip_g2 (bool):     Whether to flip the sign of the input g2 values. (default: False)
        flip_t1 (bool):     Whether to flip the sign of the input t1 values. (default: False)
        flip_t2 (bool):     Whether to flip the sign of the input t2 values. (default: False)
        flip_q1 (bool):     Whether to flip the sign of the input q1 values. (default: False)
        flip_q2 (bool):     Whether to flip the sign of the input q2 values. (default: False)
        keep_zero_weight (bool): Whether to keep objects with wpos=0 in the catalog (including
                            any objects that indirectly get wpos=0 due to NaN or flags), so they
                            would be included in ntot and also in npairs calculations that use
                            this Catalog, although of course not contribute to the accumulated
                            weight of pairs. (default: False)
        save_patch_dir (str): If desired, when building patches from this Catalog, save them
                            as FITS files in the given directory for more efficient loading when
                            doing cross-patch correlations with the ``low_mem`` option.

        ext (int/str):      For FITS/HDF files, Which extension to read. (default: 1 for fits,
                            root for HDF)
        x_ext (int/str):    Which extension to use for the x values. (default: ext)
        y_ext (int/str):    Which extension to use for the y values. (default: ext)
        z_ext (int/str):    Which extension to use for the z values. (default: ext)
        ra_ext (int/str):   Which extension to use for the ra values. (default: ext)
        dec_ext (int/str):  Which extension to use for the dec values. (default: ext)
        r_ext (int/str):    Which extension to use for the r values. (default: ext)
        k_ext (int/str):    Which extension to use for the k values. (default: ext)
        z1_ext (int/str):   Which extension to use for the z1 values. (default: ext)
        z2_ext (int/str):   Which extension to use for the z2 values. (default: ext)
        v1_ext (int/str):   Which extension to use for the v1 values. (default: ext)
        v2_ext (int/str):   Which extension to use for the v2 values. (default: ext)
        g1_ext (int/str):   Which extension to use for the g1 values. (default: ext)
        g2_ext (int/str):   Which extension to use for the g2 values. (default: ext)
        t1_ext (int/str):   Which extension to use for the t1 values. (default: ext)
        t2_ext (int/str):   Which extension to use for the t2 values. (default: ext)
        q1_ext (int/str):   Which extension to use for the q1 values. (default: ext)
        q2_ext (int/str):   Which extension to use for the q2 values. (default: ext)
        patch_ext (int/str): Which extension to use for the patch numbers. (default: ext)
        w_ext (int/str):    Which extension to use for the w values. (default: ext)
        wpos_ext (int/str): Which extension to use for the wpos values. (default: ext)
        flag_ext (int/str): Which extension to use for the flag values. (default: ext)

        x_eval (str):       An eval string to use for the x values. (default: None)
        y_eval (str):       An eval string to use for the y values. (default: None)
        z_eval (str):       An eval string to use for the z values. (default: None)
        ra_eval (str):      An eval string to use for the ra values. (default: None)
        dec_eval (str):     An eval string to use for the dec values. (default: None)
        r_eval (str):       An eval string to use for the r values. (default: None)
        k_eval (str):       An eval string to use for the k values. (default: None)
        z1_eval (str):      An eval string to use for the z1 values. (default: None)
        z2_eval (str):      An eval string to use for the z2 values. (default: None)
        v1_eval (str):      An eval string to use for the v1 values. (default: None)
        v2_eval (str):      An eval string to use for the v2 values. (default: None)
        g1_eval (str):      An eval string to use for the g1 values. (default: None)
        g2_eval (str):      An eval string to use for the g2 values. (default: None)
        t1_eval (str):      An eval string to use for the t1 values. (default: None)
        t2_eval (str):      An eval string to use for the t2 values. (default: None)
        q1_eval (str):      An eval string to use for the q1 values. (default: None)
        q2_eval (str):      An eval string to use for the q2 values. (default: None)
        patch_eval (str):   An eval string to use for the patch numbers. (default: None)
        w_eval (str):       An eval string to use for the weight values. (default: None)
        wpos_eval (str):    An eval string to use for the position weight values. (default: None)
        flag_eval (str):    An eval string to use for the flag values. (default: None)
        extra_cols (list):  A list of column names to read to be used for the quantities
                            that are calculated with eval. (default: None)

        verbose (int):      If no logger is provided, this will optionally specify a logging level
                            to use.

                                - 0 means no logging output
                                - 1 means to output warnings only (default)
                                - 2 means to output various progress information
                                - 3 means to output extensive debugging information

        log_file (str):     If no logger is provided, this will specify a file to write the logging
                            output. (default: None; i.e. output to standard output)

        split_method (str): How to split the cells in the tree when building the tree structure.
                            Options are:

                                - mean: Use the arithmetic mean of the coordinate being split.
                                  (default)
                                - median: Use the median of the coordinate being split.
                                - middle: Use the middle of the range; i.e. the average of the
                                  minimum and maximum value.
                                - random: Use a random point somewhere in the middle two quartiles
                                  of the range.

        cat_precision (int): The precision to use when writing a Catalog to an ASCII file. This
                            should be an integer, which specifies how many digits to write.
                            (default: 16)

        rng (np.Generator): If desired, a numpy.random.Generator or numpy.random.RandomState
                            instance to use for any random number generation (e.g. kmeans patches).
                            (default: None)

        num_threads (int):  How many OpenMP threads to use during the catalog load steps.
                            (default: use the number of cpu cores)

                            .. note::

                                This won't work if the system's C compiler cannot use OpenMP
                                (e.g. clang prior to version 3.7.)
    """
    # Dict describing the valid kwarg parameters, what types they are, and a description:
    # Each value is a tuple with the following elements:
    #    type
    #    may_be_list
    #    default value
    #    list of valid values
    #    description
    _valid_params = {
        'file_type' : (str, True, None, ['ASCII', 'FITS', 'HDF', 'Parquet'],
                'The file type of the input files. The default is to use the file name extension.'),
        'delimiter' : (str, True, None, None,
                'The delimiter between values in an ASCII catalog. The default is any whitespace.'),
        'comment_marker' : (str, True, '#', None,
                'The first (non-whitespace) character of comment lines in an input ASCII catalog.'),
        'first_row' : (int, True, 1, None,
                'The first row to use from the input catalog'),
        'last_row' : (int, True, -1, None,
                'The last row to use from the input catalog. The default is to use all of them.'),
        'every_nth' : (int, True, 1, None,
                'Only use every nth row of the input catalog. The default is to use all of them.'),
        'x_col' : (str, True, '0', None,
                'Which column to use for x. Should be an integer for ASCII catalogs.'),
        'y_col' : (str, True, '0', None,
                'Which column to use for y. Should be an integer for ASCII catalogs.'),
        'z_col' : (str, True, '0', None,
                'Which column to use for z. Should be an integer for ASCII catalogs.'),
        'ra_col' : (str, True, '0', None,
                'Which column to use for ra. Should be an integer for ASCII catalogs.'),
        'dec_col' : (str, True, '0', None,
                'Which column to use for dec. Should be an integer for ASCII catalogs.'),
        'r_col' : (str, True, '0', None,
                'Which column to use for r.  Only valid with ra,dec. ',
                'Should be an integer for ASCII catalogs.'),
        'x_units' : (str, True, None, coord.AngleUnit.valid_names,
                'The units of x values.'),
        'y_units' : (str, True, None, coord.AngleUnit.valid_names,
                'The units of y values.'),
        'ra_units' : (str, True, None, coord.AngleUnit.valid_names,
                'The units of ra values. Required when using ra_col.'),
        'dec_units' : (str, True, None, coord.AngleUnit.valid_names,
                'The units of dec values. Required when using dec_col.'),
        'k_col' : (str, True, '0', None,
                'Which column to use for kappa. Should be an integer for ASCII catalogs. '),
        'z1_col' : (str, True, '0', None,
                'Which column to use for z1. Should be an integer for ASCII catalogs.'),
        'z2_col' : (str, True, '0', None,
                'Which column to use for z2. Should be an integer for ASCII catalogs.'),
        'v1_col' : (str, True, '0', None,
                'Which column to use for v1. Should be an integer for ASCII catalogs.'),
        'v2_col' : (str, True, '0', None,
                'Which column to use for v2. Should be an integer for ASCII catalogs.'),
        'g1_col' : (str, True, '0', None,
                'Which column to use for g1. Should be an integer for ASCII catalogs.'),
        'g2_col' : (str, True, '0', None,
                'Which column to use for g2. Should be an integer for ASCII catalogs.'),
        't1_col' : (str, True, '0', None,
                'Which column to use for t1. Should be an integer for ASCII catalogs.'),
        't2_col' : (str, True, '0', None,
                'Which column to use for t2. Should be an integer for ASCII catalogs.'),
        'q1_col' : (str, True, '0', None,
                'Which column to use for q1. Should be an integer for ASCII catalogs.'),
        'q2_col' : (str, True, '0', None,
                'Which column to use for q2. Should be an integer for ASCII catalogs.'),
        'patch_col' : (str, True, '0', None,
                'Which column to use for patch numbers. Should be an integer for ASCII catalogs. '),
        'w_col' : (str, True, '0', None,
                'Which column to use for weight. Should be an integer for ASCII catalogs.'),
        'wpos_col' : (str, True, '0', None,
                'Which column to use for position weight. Should be an integer for ASCII '
                'catalogs.'),
        'flag_col' : (str, True, '0', None,
                'Which column to use for flag. Should be an integer for ASCII catalogs.'),
        'ignore_flag': (int, True, None, None,
                'Ignore objects with flag & ignore_flag != 0 (bitwise &)'),
        'ok_flag': (int, True, 0, None,
                'Ignore objects with flag & ~ok_flag != 0 (bitwise &, ~)'),
        'allow_xyz': (bool, True, False, None,
                'Whether to allow x,y,z inputs in conjunction with ra,dec'),
        'ext': (str, True, None, None,
                'Which extension/group in a fits/hdf file to use. Default=1 (fits), root (hdf)'),
        'x_ext': (str, True, None, None,
                'Which extension to use for the x_col. default is the global ext value.'),
        'y_ext': (str, True, None, None,
                'Which extension to use for the y_col. default is the global ext value.'),
        'z_ext': (str, True, None, None,
                'Which extension to use for the z_col. default is the global ext value.'),
        'ra_ext': (str, True, None, None,
                'Which extension to use for the ra_col. default is the global ext value.'),
        'dec_ext': (str, True, None, None,
                'Which extension to use for the dec_col. default is the global ext value.'),
        'r_ext': (str, True, None, None,
                'Which extension to use for the r_col. default is the global ext value.'),
        'k_ext': (str, True, None, None,
                'Which extension to use for the k_col. default is the global ext value.'),
        'z1_ext': (str, True, None, None,
                'Which extension to use for the z1_col. default is the global ext value.'),
        'z2_ext': (str, True, None, None,
                'Which extension to use for the z2_col. default is the global ext value.'),
        'v1_ext': (str, True, None, None,
                'Which extension to use for the v1_col. default is the global ext value.'),
        'v2_ext': (str, True, None, None,
                'Which extension to use for the v2_col. default is the global ext value.'),
        'g1_ext': (str, True, None, None,
                'Which extension to use for the g1_col. default is the global ext value.'),
        'g2_ext': (str, True, None, None,
                'Which extension to use for the g2_col. default is the global ext value.'),
        't1_ext': (str, True, None, None,
                'Which extension to use for the t1_col. default is the global ext value.'),
        't2_ext': (str, True, None, None,
                'Which extension to use for the t2_col. default is the global ext value.'),
        'q1_ext': (str, True, None, None,
                'Which extension to use for the q1_col. default is the global ext value.'),
        'q2_ext': (str, True, None, None,
                'Which extension to use for the q2_col. default is the global ext value.'),
        'patch_ext': (str, True, None, None,
                'Which extension to use for the patch_col. default is the global ext value.'),
        'w_ext': (str, True, None, None,
                'Which extension to use for the w_col. default is the global ext value.'),
        'wpos_ext': (str, True, None, None,
                'Which extension to use for the wpos_col. default is the global ext value.'),
        'flag_ext': (str, True, None, None,
                'Which extension to use for the flag_col. default is the global ext value.'),
        'x_eval' : (str, True, None, None,
                'An eval string to use for x.'),
        'y_eval' : (str, True, None, None,
                'An eval string to use for y.'),
        'z_eval' : (str, True, None, None,
                'An eval string to use for z.'),
        'ra_eval' : (str, True, None, None,
                'An eval string to use for ra.'),
        'dec_eval' : (str, True, None, None,
                'An eval string to use for dec.'),
        'r_eval' : (str, True, None, None,
                'An eval string to use for r.'),
        'k_eval' : (str, True, None, None,
                'An eval string to use for kappa.'),
        'z1_eval' : (str, True, None, None,
                'An eval string to use for z1.'),
        'z2_eval' : (str, True, None, None,
                'An eval string to use for z2.'),
        'v1_eval' : (str, True, None, None,
                'An eval string to use for v1.'),
        'v2_eval' : (str, True, None, None,
                'An eval string to use for v2.'),
        'g1_eval' : (str, True, None, None,
                'An eval string to use for g1.'),
        'g2_eval' : (str, True, None, None,
                'An eval string to use for g2.'),
        't1_eval' : (str, True, None, None,
                'An eval string to use for t1.'),
        't2_eval' : (str, True, None, None,
                'An eval string to use for t2.'),
        'q1_eval' : (str, True, None, None,
                'An eval string to use for q1.'),
        'q2_eval' : (str, True, None, None,
                'An eval string to use for q2.'),
        'patch_eval' : (str, True, None, None,
                'An eval string to use for patch numbers.'),
        'w_eval' : (str, True, None, None,
                'An eval string to use for weight.'),
        'wpos_eval' : (str, True, None, None,
                'An eval string to use for position weight.'),
        'flag_eval' : (str, True, None, None,
                'An eval string to use for flag.'),
        'extra_cols': (list, False, None, None,
                'A list of extra column names to read for the eval calculations'),
        'flip_z1' : (bool, True, False, None,
                'Whether to flip the sign of z1'),
        'flip_z2' : (bool, True, False, None,
                'Whether to flip the sign of z2'),
        'flip_v1' : (bool, True, False, None,
                'Whether to flip the sign of v1'),
        'flip_v2' : (bool, True, False, None,
                'Whether to flip the sign of v2'),
        'flip_g1' : (bool, True, False, None,
                'Whether to flip the sign of g1'),
        'flip_g2' : (bool, True, False, None,
                'Whether to flip the sign of g2'),
        'flip_t1' : (bool, True, False, None,
                'Whether to flip the sign of t1'),
        'flip_t2' : (bool, True, False, None,
                'Whether to flip the sign of t2'),
        'flip_q1' : (bool, True, False, None,
                'Whether to flip the sign of q1'),
        'flip_q2' : (bool, True, False, None,
                'Whether to flip the sign of q2'),
        'vark': (float, True, None, None,
                'Variance of k values to use, rather than calculate directly'),
        'varz': (float, True, None, None,
                'Variance of z values to use, rather than calculate directly'),
        'varv': (float, True, None, None,
                'Variance of v values to use, rather than calculate directly'),
        'varg': (float, True, None, None,
                'Variance of g values to use, rather than calculate directly'),
        'vart': (float, True, None, None,
                'Variance of t values to use, rather than calculate directly'),
        'varq': (float, True, None, None,
                'Variance of q values to use, rather than calculate directly'),

        'keep_zero_weight' : (bool, False, False, None,
                'Whether to keep objects with zero weight in the catalog'),
        'npatch' : (int, False, None, None,
                'Number of patches to split the catalog into'),
        'kmeans_init' : (str, False, 'tree', ['tree','random','kmeans++'],
                'Which initialization method to use for kmeans when making patches'),
        'kmeans_alt' : (bool, False, False, None,
                'Whether to use the alternate kmeans algorithm when making patches'),
        'patch_centers' : (str, False, None, None,
                'File with patch centers to use to determine patches'),
        'save_patch_dir' : (str, False, None, None,
                'If desired, save the patches as FITS files in this directory.'),
        'verbose' : (int, False, 1, [0, 1, 2, 3],
                'How verbose the code should be during processing. ',
                '0 = Errors Only, 1 = Warnings, 2 = Progress, 3 = Debugging'),
        'log_file' : (str, False, None, None,
                'If desired, an output file for the logging output.',
                'The default is to write the output to stdout.'),
        'split_method' : (str, False, 'mean', ['mean', 'median', 'middle', 'random'],
                'Which method to use for splitting cells.'),
        'cat_precision' : (int, False, 16, None,
                'The number of digits after the decimal in the output.'),
    }
    eval_modules = ['numpy', 'numpy as np', 'math', 'coord']
    _aliases = {}
    _emitted_pandas_warning = False  # Only emit the warning once.  Set to True once we have.

    def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=False,
                 x=None, y=None, z=None, ra=None, dec=None, r=None, w=None, wpos=None, flag=None,
                 k=None, z1=None, z2=None, v1=None, v2=None, g1=None, g2=None,
                 t1=None, t2=None, q1=None, q2=None,
                 patch=None, patch_centers=None, rng=None, **kwargs):

        self.config = merge_config(config, kwargs, Catalog._valid_params, Catalog._aliases)
        self.orig_config = config.copy() if config is not None else {}
        if config and kwargs:
            self.orig_config.update(kwargs)
        self._num = num
        self._is_rand = is_rand

        if logger is not None:
            self.logger = logger
            self._logger_name = logger.name
        else:
            self._logger_name = 'treecorr.Catalog'
            self.logger = setup_logger(get(self.config,'verbose',int,1),
                                       self.config.get('log_file',None), self._logger_name)

        # Start with everything set to None.  Overwrite as appropriate.
        self._x = None
        self._y = None
        self._z = None
        self._ra = None
        self._dec = None
        self._r = None
        self._w = None
        self._wpos = None
        self._flag = None
        self._k = None
        self._z1 = None
        self._z2 = None
        self._v1 = None
        self._v2 = None
        self._g1 = None
        self._g2 = None
        self._t1 = None
        self._t2 = None
        self._q1 = None
        self._q2 = None
        self._patch = None
        self._field = lambda : None

        self._nontrivial_w = None
        self._single_patch = None
        self._nobj = None
        self._sumw = None
        self._sumw2 = None
        self._vark = None
        self._varz = None
        self._varv = None
        self._varg = None
        self._vart = None
        self._varq = None
        self._patches = None
        self._centers = None
        self._computed_centers = None
        self._rng = rng

        first_row = get_from_list(self.config,'first_row',num,int,1)
        if first_row < 1:
            raise ValueError("first_row should be >= 1")
        last_row = get_from_list(self.config,'last_row',num,int,-1)
        if last_row > 0 and last_row < first_row:
            raise ValueError("last_row should be >= first_row")
        if last_row > 0:
            self.end = last_row
        else:
            self.end = None
        if first_row > 1:
            self.start = first_row-1
        else:
            self.start = 0
        self.every_nth = get_from_list(self.config,'every_nth',num,int,1)
        if self.every_nth < 1:
            raise ValueError("every_nth should be >= 1")

        try:
            self._single_patch = int(patch)
        except TypeError:
            pass
        else:
            patch = None

        if patch_centers is None and 'patch_centers' in self.config:
            # file name version may be in a config dict, rather than kwarg.
            patch_centers = get(self.config,'patch_centers',str)

        if patch_centers is not None:
            if patch is not None or self.config.get('patch_col',0) not in (0,'0'):
                raise ValueError("Cannot provide both patch and patch_centers")
            if isinstance(patch_centers, np.ndarray):
                self._centers = patch_centers
            else:
                self._centers = self.read_patch_centers(patch_centers)
            self._npatch = self._centers.shape[0]
            if self.config.get('npatch', self._npatch) != self._npatch:
                raise ValueError("npatch is incompatible with provided centers")
        elif 'npatch' in self.config:
            self._npatch = get(self.config,'npatch',int)
            if self._npatch < 1:
                raise ValueError("npatch must be >= 1")
        elif self.config.get('patch_col',0) not in (0,'0'):
            self._npatch = None  # Mark that we need to finish loading to figure out npatch.
        elif self._single_patch:
            raise ValueError("Either npatch or patch_centers is required when providing "
                             "an integer patch value.")
        else:
            self._npatch = 1  # We might yet change this, but it will be correct at end of init.

        self.save_patch_dir = self.config.get('save_patch_dir',None)
        allow_xyz = self.config.get('allow_xyz', False)

        # First style -- read from a file
        if file_name is not None:
            if any([v is not None
                    for v in [x,y,z,ra,dec,r,k,z1,z2,v1,v2,g1,g2,t1,t2,q1,q2,patch,w,wpos,flag]]):
                raise TypeError("Vectors may not be provided when file_name is provided.")
            self.file_name = file_name
            self.name = file_name
            if self._single_patch is not None:
                self.name += " patch " + str(self._single_patch)

            # Figure out which file type the catalog is
            file_type = get_from_list(self.config,'file_type',num)
            file_type = parse_file_type(file_type, file_name, output=False, logger=self.logger)
            if file_type == 'FITS':
                self.reader = FitsReader(file_name)
                self._check_file(file_name, self.reader, num, is_rand)
            elif file_type == 'HDF':
                self.reader = HdfReader(file_name)
                self._check_file(file_name, self.reader, num, is_rand)
            elif file_type == 'PARQUET':
                self.reader = ParquetReader(file_name)
                self._check_file(file_name, self.reader, num, is_rand)
            else:
                delimiter = self.config.get('delimiter',None)
                comment_marker = self.config.get('comment_marker','#')
                try:
                    self.reader = PandasReader(file_name, delimiter=delimiter,
                                               comment_marker=comment_marker)
                except ImportError:
                    self._pandas_warning()
                    self.reader = AsciiReader(file_name, delimiter=delimiter,
                                              comment_marker=comment_marker)
                self._check_file(file_name, self.reader, num, is_rand)

            self.file_type = file_type

        # Second style -- pass in the vectors directly
        else:
            self.file_type = None
            if x is not None or y is not None:
                if x is None or y is None:
                    raise TypeError("x and y must both be provided")
                if (ra is not None or dec is not None) and not allow_xyz:
                    raise TypeError("ra and dec may not be provided with x,y")
                if r is not None and not allow_xyz:
                    raise TypeError("r may not be provided with x,y")
            if ra is not None or dec is not None:
                if ra is None or dec is None:
                    raise TypeError("ra and dec must both be provided")
            if z1 is not None or z2 is not None:
                if z1 is None or z2 is None:
                    raise TypeError("z1 and z2 must both be provided")
            if v1 is not None or v2 is not None:
                if v1 is None or v2 is None:
                    raise TypeError("v1 and v2 must both be provided")
            if g1 is not None or g2 is not None:
                if g1 is None or g2 is None:
                    raise TypeError("g1 and g2 must both be provided")
            if t1 is not None or t2 is not None:
                if t1 is None or t2 is None:
                    raise TypeError("t1 and t2 must both be provided")
            if q1 is not None or q2 is not None:
                if q1 is None or q2 is None:
                    raise TypeError("q1 and q2 must both be provided")
            self.file_name = None
            self.name = ''
            if self._single_patch is not None:
                self.name = "patch " + str(self._single_patch)
            self._x = self.makeArray(x,'x')
            self._y = self.makeArray(y,'y')
            self._z = self.makeArray(z,'z')
            self._ra = self.makeArray(ra,'ra')
            self._dec = self.makeArray(dec,'dec')
            self._r = self.makeArray(r,'r')
            self._w = self.makeArray(w,'w')
            self._wpos = self.makeArray(wpos,'wpos')
            self._flag = self.makeArray(flag,'flag',int)
            self._k = self.makeArray(k,'k')
            self._z1 = self.makeArray(z1,'z1')
            self._z2 = self.makeArray(z2,'z2')
            self._v1 = self.makeArray(v1,'v1')
            self._v2 = self.makeArray(v2,'v2')
            self._g1 = self.makeArray(g1,'g1')
            self._g2 = self.makeArray(g2,'g2')
            self._t1 = self.makeArray(t1,'t1')
            self._t2 = self.makeArray(t2,'t2')
            self._q1 = self.makeArray(q1,'q1')
            self._q2 = self.makeArray(q2,'q2')
            self._patch = self.makeArray(patch,'patch',int)
            if self._patch is not None:
                self._set_npatch()
            if self._x is not None:
                self._apply_xyz_units()
            if self._ra is not None:
                self._apply_radec_units()

            # Check that all columns have the same length.  (This is impossible in file input)
            if self._x is not None:
                ntot = len(self._x)
                if len(self._y) != ntot:
                    raise ValueError("x and y have different numbers of elements")
            else:
                ntot = len(self._ra)
                if len(self._dec) != ntot:
                    raise ValueError("ra and dec have different numbers of elements")
            if self._z is not None and len(self._z) != ntot:
                raise ValueError("z has the wrong numbers of elements")
            if self._r is not None and len(self._r) != ntot:
                raise ValueError("r has the wrong numbers of elements")
            if self._w is not None and len(self._w) != ntot:
                raise ValueError("w has the wrong numbers of elements")
            if self._wpos is not None and len(self._wpos) != ntot:
                raise ValueError("wpos has the wrong numbers of elements")
            if self._k is not None and len(self._k) != ntot:
                raise ValueError("k has the wrong numbers of elements")
            if self._z1 is not None and len(self._z1) != ntot:
                raise ValueError("z1 has the wrong numbers of elements")
            if self._z2 is not None and len(self._z2) != ntot:
                raise ValueError("z2 has the wrong numbers of elements")
            if self._v1 is not None and len(self._v1) != ntot:
                raise ValueError("v1 has the wrong numbers of elements")
            if self._v2 is not None and len(self._v2) != ntot:
                raise ValueError("v2 has the wrong numbers of elements")
            if self._g1 is not None and len(self._g1) != ntot:
                raise ValueError("g1 has the wrong numbers of elements")
            if self._g2 is not None and len(self._g2) != ntot:
                raise ValueError("g2 has the wrong numbers of elements")
            if self._t1 is not None and len(self._t1) != ntot:
                raise ValueError("t1 has the wrong numbers of elements")
            if self._t2 is not None and len(self._t2) != ntot:
                raise ValueError("t2 has the wrong numbers of elements")
            if self._q1 is not None and len(self._q1) != ntot:
                raise ValueError("q1 has the wrong numbers of elements")
            if self._q2 is not None and len(self._q2) != ntot:
                raise ValueError("q2 has the wrong numbers of elements")
            if self._patch is not None and len(self._patch) != ntot:
                raise ValueError("patch has the wrong numbers of elements")
            if ntot == 0:
                raise ValueError("Input arrays have zero length")

        if (x is not None or self.config.get('x_col','0') not in [0,'0']
            or self.config.get('x_eval') is not None):
            if 'x_units' in self.config and 'y_units' not in self.config:
                raise TypeError("x_units specified without specifying y_units")
            if 'y_units' in self.config and 'x_units' not in self.config:
                raise TypeError("y_units specified without specifying x_units")
        else:
            if 'x_units' in self.config:
                raise TypeError("x_units is invalid without x")
            if 'y_units' in self.config:
                raise TypeError("y_units is invalid without y")
        if (ra is not None or self.config.get('ra_col','0') not in [0,'0']
            or self.config.get('ra_eval') is not None):
            if not self.config.get('ra_units',None):
                raise TypeError("ra_units is required when using ra, dec")
            if not self.config.get('dec_units',None):
                raise TypeError("dec_units is required when using ra, dec")
        else:
            if 'ra_units' in self.config:
                raise TypeError("ra_units is invalid without ra")
            if 'dec_units' in self.config:
                raise TypeError("dec_units is invalid without dec")

        if self.config.get('vark', None) is not None:
            self._vark = self.config['vark']
            self._meank = 0.
            self._altmeank = 0.
        if self.config.get('varz', None) is not None:
            self._varz = self._varz1 = self._varz2 = self.config['varz']
            self._meanz1 = self._meanz2 = 0.
            self._altmeanz1 = self._altmeanz2 = 0.
        if self.config.get('varv', None) is not None:
            self._varv = self._varv1 = self._varv2 = self.config['varv']
            self._meanv1 = self._meanv2 = 0.
            self._altmeanv1 = self._altmeanv2 = 0.
        if self.config.get('varg', None) is not None:
            self._varg = self._varg1 = self._varg2 = self.config['varg']
            self._meang1 = self._meang2 = 0.
            self._altmeang1 = self._altmeang2 = 0.
        if self.config.get('vart', None) is not None:
            self._vart = self._vart1 = self._vart2 = self.config['vart']
            self._meant1 = self._meant2 = 0.
            self._altmeant1 = self._altmeant2 = 0.
        if self.config.get('varq', None) is not None:
            self._varq = self._varq1 = self._varq2 = self.config['varq']
            self._meanq1 = self._meanq2 = 0.
            self._altmeanq1 = self._altmeanq2 = 0.

        if file_name is None:
            # For vector input option, can finish up now.
            if self._single_patch is not None:
                self._select_patch(self._single_patch)
            self._finish_input()

    @property
    def loaded(self):
        # _x gets set regardless of whether input used x,y or ra,dec, so the state of this
        # attribute is a good sentinal for whether the file has been loaded yet.
        return self._x is not None

    @property
    def x(self):
        self.load()
        return self._x

    @property
    def y(self):
        self.load()
        return self._y

    @property
    def z(self):
        self.load()
        return self._z

    @property
    def ra(self):
        self.load()
        return self._ra

    @property
    def dec(self):
        self.load()
        return self._dec

    @property
    def r(self):
        self.load()
        return self._r

    @property
    def w(self):
        self.load()
        return self._w

    @property
    def wpos(self):
        self.load()
        return self._wpos

    @property
    def k(self):
        self.load()
        return self._k

    @property
    def z1(self):
        self.load()
        return self._z1

    @property
    def z2(self):
        self.load()
        return self._z2

    @property
    def v1(self):
        self.load()
        return self._v1

    @property
    def v2(self):
        self.load()
        return self._v2

    @property
    def g1(self):
        self.load()
        return self._g1

    @property
    def g2(self):
        self.load()
        return self._g2

    @property
    def t1(self):
        self.load()
        return self._t1

    @property
    def t2(self):
        self.load()
        return self._t2

    @property
    def q1(self):
        self.load()
        return self._q1

    @property
    def q2(self):
        self.load()
        return self._q2

    @property
    def npatch(self):
        if self._npatch is None:
            self.load()
        return self._npatch

    @property
    def patch(self):
        if self._single_patch is not None:
            return self._single_patch
        else:
            self.load()
            return self._patch

    @property
    def patches(self):
        return self.get_patches()

    @property
    def patch_centers(self):
        return self.get_patch_centers()

    def _calculate_weighted_var(self, k):
        if k is not None:
            if self.nontrivial_w:
                use = self.w != 0
                meank = np.sum(self.w[use] * k[use]) / self.sumw
                # "alt" means weighted by w^2, rather than w, which we also need
                # when building this back up from multiple catalogs.
                altmeank = np.sum(self.w[use]**2 * k[use]) / self.sumw2
                vark = np.sum(self.w[use]**2 * (k[use]-meank)**2) / self.sumw
            else:
                meank = altmeank = np.mean(k)
                vark = np.sum((k-meank)**2) / self.nobj
        else:
            meank = altmeank = vark = 0.
        return meank, altmeank, vark

    @property
    def vark(self):
        if self._vark is None:
            self._meank, self._altmeank, self._vark = self._calculate_weighted_var(self.k)
        return self._vark

    @property
    def varz(self):
        if self._varz is None:
            self._meanz1, self._altmeanz1, self._varz1 = self._calculate_weighted_var(self.z1)
            self._meanz2, self._altmeanz2, self._varz2 = self._calculate_weighted_var(self.z2)
            self._varz = (self._varz1 + self._varz2)/2
        return self._varz

    @property
    def varv(self):
        if self._varv is None:
            self._meanv1, self._altmeanv1, self._varv1 = self._calculate_weighted_var(self.v1)
            self._meanv2, self._altmeanv2, self._varv2 = self._calculate_weighted_var(self.v2)
            self._varv = (self._varv1 + self._varv2)/2
        return self._varv

    @property
    def varg(self):
        if self._varg is None:
            self._meang1, self._altmeang1, self._varg1 = self._calculate_weighted_var(self.g1)
            self._meang2, self._altmeang2, self._varg2 = self._calculate_weighted_var(self.g2)
            self._varg = (self._varg1 + self._varg2)/2
        return self._varg

    @property
    def vart(self):
        if self._vart is None:
            self._meant1, self._altmeant1, self._vart1 = self._calculate_weighted_var(self.t1)
            self._meant2, self._altmeant2, self._vart2 = self._calculate_weighted_var(self.t2)
            self._vart = (self._vart1 + self._vart2)/2
        return self._vart

    @property
    def varq(self):
        if self._varq is None:
            self._meanq1, self._altmeanq1, self._varq1 = self._calculate_weighted_var(self.q1)
            self._meanq2, self._altmeanq2, self._varq2 = self._calculate_weighted_var(self.q2)
            self._varq = (self._varq1 + self._varq2)/2
        return self._varq

    @property
    def nontrivial_w(self):
        if self._nontrivial_w is None: self.load()
        return self._nontrivial_w

    @property
    def ntot(self):
        return len(self.x)

    @property
    def nobj(self):
        if self._nobj is None:
            if self.nontrivial_w:
                use = self._w != 0
                self._nobj = np.sum(use)
            else:
                self._nobj = self.ntot
        return self._nobj

    @property
    def sumw(self):
        if self._sumw is None: self.load()
        return self._sumw

    @property
    def sumw2(self):
        if self._sumw2 is None:
            if self.nontrivial_w:
                self._sumw2 = np.sum(self.w**2)
            else:
                self._sumw2 = self.ntot
        return self._sumw2

    @property
    def meanw(self):
        if self.nontrivial_w:
            return self.sumw / self.nobj
        else:
            return 1.

    @property
    def coords(self):
        if self.ra is not None:
            if self.r is None:
                return 'spherical'
            else:
                return '3d'
        else:
            if self.z is None:
                return 'flat'
            else:
                return '3d'

    def _get_center_size(self):
        if not hasattr(self, '_cen_s'):
            mx = np.mean(self.x)
            my = np.mean(self.y)
            mz = 0
            dsq = (self.x - mx)**2 + (self.y - my)**2
            if self.z is not None:
                mz = np.mean(self.z)
                dsq += (self.z - mz)**2
            s = np.max(dsq)**0.5
            self._cen_s = (mx, my, mz, s)
        return self._cen_s

    def _finish_input(self):
        # Finish processing the data based on given inputs.

        # Apply flips if requested
        flip_z1 = get_from_list(self.config,'flip_z1',self._num,bool,False)
        flip_z2 = get_from_list(self.config,'flip_z2',self._num,bool,False)
        if flip_z1:
            self.logger.info("   Flipping sign of z1.")
            self._z1 = -self._z1
        if flip_z2:
            self.logger.info("   Flipping sign of z2.")
            self._z2 = -self._z2

        flip_v1 = get_from_list(self.config,'flip_v1',self._num,bool,False)
        flip_v2 = get_from_list(self.config,'flip_v2',self._num,bool,False)
        if flip_v1:
            self.logger.info("   Flipping sign of v1.")
            self._v1 = -self._v1
        if flip_v2:
            self.logger.info("   Flipping sign of v2.")
            self._v2 = -self._v2

        flip_g1 = get_from_list(self.config,'flip_g1',self._num,bool,False)
        flip_g2 = get_from_list(self.config,'flip_g2',self._num,bool,False)
        if flip_g1:
            self.logger.info("   Flipping sign of g1.")
            self._g1 = -self._g1
        if flip_g2:
            self.logger.info("   Flipping sign of g2.")
            self._g2 = -self._g2

        flip_t1 = get_from_list(self.config,'flip_t1',self._num,bool,False)
        flip_t2 = get_from_list(self.config,'flip_t2',self._num,bool,False)
        if flip_t1:
            self.logger.info("   Flipping sign of t1.")
            self._t1 = -self._t1
        if flip_t2:
            self.logger.info("   Flipping sign of t2.")
            self._t2 = -self._t2

        flip_q1 = get_from_list(self.config,'flip_q1',self._num,bool,False)
        flip_q2 = get_from_list(self.config,'flip_q2',self._num,bool,False)
        if flip_q1:
            self.logger.info("   Flipping sign of q1.")
            self._q1 = -self._q1
        if flip_q2:
            self.logger.info("   Flipping sign of q2.")
            self._q2 = -self._q2

        # Convert the flag to a weight
        if self._flag is not None:
            if 'ignore_flag' in self.config:
                ignore_flag = get_from_list(self.config,'ignore_flag',self._num,int)
            else:
                ok_flag = get_from_list(self.config,'ok_flag',self._num,int,0)
                ignore_flag = ~ok_flag
            # If we don't already have a weight column, make one with all values = 1.
            if self._w is None:
                self._w = np.ones_like(self._flag, dtype=float)
            self._w[(self._flag & ignore_flag)!=0] = 0
            if self._wpos is not None:
                self._wpos[(self._flag & ignore_flag)!=0] = 0
            self.logger.debug('Applied flag')

        # Check for NaN's:
        self.checkForNaN(self._x,'x')
        self.checkForNaN(self._y,'y')
        self.checkForNaN(self._z,'z')
        self.checkForNaN(self._ra,'ra')
        self.checkForNaN(self._dec,'dec')
        self.checkForNaN(self._r,'r')
        self.checkForNaN(self._k,'k')
        self.checkForNaN(self._z1,'z1')
        self.checkForNaN(self._z2,'z2')
        self.checkForNaN(self._v1,'v1')
        self.checkForNaN(self._v2,'v2')
        self.checkForNaN(self._g1,'g1')
        self.checkForNaN(self._g2,'g2')
        self.checkForNaN(self._t1,'t1')
        self.checkForNaN(self._t2,'t2')
        self.checkForNaN(self._q1,'q1')
        self.checkForNaN(self._q2,'q2')
        self.checkForNaN(self._w,'w')
        self.checkForNaN(self._wpos,'wpos')

        # If using ra/dec, generate x,y,z
        # Note: This also makes self.ntot work properly.
        self._generate_xyz()

        # Copy w to wpos if necessary (Do this after checkForNaN's, since this may set some
        # entries to have w=0.)
        if self._wpos is None:
            self.logger.debug('Using w for wpos')
        else:
            # Check that any wpos == 0 points also have w == 0
            if np.any(self._wpos == 0.):
                if self._w is None:
                    self.logger.warning('Some wpos values are zero, setting w=0 for these points.')
                    self._w = np.ones((self.ntot), dtype=float)
                else:
                    if np.any(self._w[self._wpos == 0.] != 0.):
                        self.logger.error('Some wpos values = 0 but have w!=0. This is invalid.\n'
                                          'Setting w=0 for these points.')
                self._w[self._wpos == 0.] = 0.

        if self._w is not None:
            self._nontrivial_w = True
            self._sumw = np.sum(self._w)
            if self._sumw == 0:
                raise ValueError("Catalog has invalid sumw == 0")
        else:
            self._nontrivial_w = False
            self._sumw = self.ntot
            # Make w all 1s to simplify the use of w later in code.
            self._w = np.ones((self.ntot), dtype=float)

        keep_zero_weight = get(self.config,'keep_zero_weight',bool,False)
        if self._nontrivial_w and not keep_zero_weight:
            wpos = self._wpos if self._wpos is not None else self._w
            if np.any(wpos == 0):
                self.select(np.where(wpos != 0)[0])

        if self._single_patch is not None or self._patch is not None:
            # Easier to get these options out of the way first.
            pass
        elif self._centers is not None:
            if ((self.coords == 'flat' and self._centers.shape[1] != 2) or
                (self.coords != 'flat' and self._centers.shape[1] != 3)):
                raise ValueError("Centers array has wrong shape.")
            self._assign_patches()
            self.logger.info("Assigned patch numbers according %d centers",self._npatch)
        elif self._npatch is not None and self._npatch != 1:
            init = get(self.config,'kmeans_init',str,'tree')
            alt = get(self.config,'kmeans_alt',bool,False)
            max_top = int.bit_length(self._npatch)-1
            c = 'spherical' if self._ra is not None else self.coords
            field = self.getNField(max_top=max_top, coords=c)
            self.logger.info("Finding %d patches using kmeans.",self._npatch)
            self._patch, self._centers = field.run_kmeans(self._npatch, init=init, alt=alt)
            # Clear the cached NField, since we will almost certainly not want this
            # particular one again, even if doing N-based correlations (since max_top, etc.
            # is almost certainly going to be different).
            self.nfields.clear()

        self.logger.info("   nobj = %d",self.nobj)

[docs]    @classmethod
    def combine(cls, cat_list, *, mask_list=None, low_mem=False):
        """Combine several Catalogs into a single larger Catalog

        If desired, one can also specify a mask for each of the input catalogs, which will
        select just a portion of the rows in that catalog

        All the Catalog must have the same columns defined (e.g. ra, dec, x, y, k, g1, g2, etc.)

        Parameters:
            cat_list:       A list of Catalog instances to combine.
            mask_list:      (optional) Which objects to take from each Catalog.  If given, it must
                            be a list of the same length as cat_list. (default: None)
            low_mem (bool): Whether to try to leave the catalogs in cat_list unloaded if they
                            started out that way to keep total memory down. (default: False)

        Returns:
            combined_cat
        """
        if len(cat_list) == 0:
            raise ValueError("cat_list cannot be empty")
        if mask_list is not None and len(mask_list) != len(cat_list):
            raise ValueError("mask_list not the same length as cat_list")

        # Use the characteristics of the first catalog to decide which columns to generate.
        cat = cat_list[0]
        loaded = cat.loaded
        cat.load()
        check_wpos = cat._wpos if cat._wpos is not None else cat._w
        kwargs = dict(keep_zero_weight=np.any(check_wpos==0))
        if cat.ra is not None:
            kwargs['ra_units'] = 'rad'
            kwargs['dec_units'] = 'rad'
            kwargs['allow_xyz'] = True
        keys = []
        for key in ['x', 'y', 'z', 'ra', 'dec', 'r', 'w', 'wpos',
                    'k', 'z1', 'z2', 'v1', 'v2', 'g1', 'g2', 't1', 't2', 'q1', 'q2']:
            if getattr(cat, key) is not None:
                a = getattr(cat,key)
                if mask_list is not None:
                    a = a[mask_list[0]]
                # Start these as lists, which we'll concatenate once we have them all.
                kwargs[key] = [a]
                keys.append(key)
        if not loaded and low_mem:
            cat.unload()

        # Add data from the rest of the catalogs to the array lists
        for i,cat in enumerate(cat_list):
            if i == 0: continue
            loaded = cat.loaded
            cat.load()
            for key in keys:  # These are now just the ones that were in cat0.
                if getattr(cat, key) is None:
                    raise ValueError("Column %s not found in cat %d"%(key,i))
                a = getattr(cat,key)
                if mask_list is not None:
                    a = a[mask_list[i]]
                kwargs[key].append(a)
            if not loaded and low_mem:
                cat.unload()

        # Concatenate the arrays.
        for key in keys:
            kwargs[key] = np.concatenate(kwargs[key])
        return Catalog(**kwargs)

    def _assign_patches(self):
        # This is equivalent to the following:
        #   field = self.getNField()
        #   self._patch = field.kmeans_assign_patches(self._centers)
        # However, when the field is not already created, it's faster to just run through
        # all the points directly and assign which one is closest.
        self._patch = np.empty(self.ntot, dtype=int)
        centers = np.ascontiguousarray(self._centers)
        set_omp_threads(self.config.get('num_threads',None))
        zx = self.z if self.z is not None else np.array([])
        _treecorr.QuickAssign(centers, self.npatch, self.x, self.y, zx, self._patch)

    def _set_npatch(self):
        npatch = max(self._patch) + 1
        if self._npatch not in [None, 1] and npatch > self._npatch:
            # Note: it's permissible for self._npatch to be larger, but not smaller.
            raise ValueError("npatch is incompatible with provided patch numbers")
        self._npatch = npatch
        self.logger.info("Assigned patch numbers 0..%d",self._npatch-1)

    def _get_patch_index(self, single_patch):
        if self._patch is not None:
            # This is straightforward.  Just select the rows with patch == single_patch
            use = np.where(self._patch == single_patch)[0]
        elif self._centers is not None:
            self._generate_xyz()
            use = np.empty(self.ntot, dtype=int)
            npatch = self._centers.shape[0]
            centers = np.ascontiguousarray(self._centers)
            if self._z is None:
                assert centers.shape[1] == 2
            else:
                assert centers.shape[1] == 3
            set_omp_threads(self.config.get('num_threads',None))
            zx = self._z if self._z is not None else np.array([])
            _treecorr.SelectPatch(single_patch, centers, self.npatch, self._x, self._y, zx, use)
            use = np.where(use)[0]
        else:
            use = slice(None)  # Which ironically means use all. :)
        return use

    def _apply_radec_units(self):
        self.ra_units = get_from_list(self.config,'ra_units',self._num)
        self.dec_units = get_from_list(self.config,'dec_units',self._num)
        self._ra *= self.ra_units
        self._dec *= self.dec_units

    def _apply_xyz_units(self):
        self.x_units = get_from_list(self.config,'x_units',self._num,str, 'radians')
        self.y_units = get_from_list(self.config,'y_units',self._num,str, 'radians')
        self._x *= self.x_units
        self._y *= self.y_units

    def _generate_xyz(self):
        if self._x is None:
            assert self._y is None
            assert self._z is None
            assert self._ra is not None
            assert self._dec is not None
            ntot = len(self._ra)
            self._x = np.empty(ntot, dtype=float)
            self._y = np.empty(ntot, dtype=float)
            self._z = np.empty(ntot, dtype=float)
            set_omp_threads(self.config.get('num_threads',None))
            rx = self._r if self._r is not None else np.array([])
            _treecorr.GenerateXYZ(self._x, self._y, self._z, self._ra, self._dec, rx)
            self.x_units = self.y_units = 1.

    def _select_patch(self, single_patch):
        # Trim the catalog to only include a single patch
        # Note: This is slightly inefficient in that it reads the whole catalog first
        # and then removes all but one patch.  But that's easier for now that figuring out
        # which items to remove along the way based on the patch_centers.
        indx = self._get_patch_index(single_patch)
        self._patch = None
        self.select(indx)

[docs]    def select(self, indx):
        """Trim the catalog to only include those objects with the give indices.

        Parameters:
            indx:       A numpy array of index values to keep.
        """
        if type(indx) == slice and indx == slice(None):
            return
        self._x = self._x[indx] if self._x is not None else None
        self._y = self._y[indx] if self._y is not None else None
        self._z = self._z[indx] if self._z is not None else None
        self._ra = self._ra[indx] if self._ra is not None else None
        self._dec = self._dec[indx] if self._dec is not None else None
        self._r = self._r[indx] if self._r is not None else None
        self._w = self._w[indx] if self._w is not None else None
        self._wpos = self._wpos[indx] if self._wpos is not None else None
        self._k = self._k[indx] if self._k is not None else None
        self._z1 = self._z1[indx] if self._z1 is not None else None
        self._z2 = self._z2[indx] if self._z2 is not None else None
        self._v1 = self._v1[indx] if self._v1 is not None else None
        self._v2 = self._v2[indx] if self._v2 is not None else None
        self._g1 = self._g1[indx] if self._g1 is not None else None
        self._g2 = self._g2[indx] if self._g2 is not None else None
        self._t1 = self._t1[indx] if self._t1 is not None else None
        self._t2 = self._t2[indx] if self._t2 is not None else None
        self._q1 = self._q1[indx] if self._q1 is not None else None
        self._q2 = self._q2[indx] if self._q2 is not None else None
        self._patch = self._patch[indx] if self._patch is not None else None

[docs]    def makeArray(self, col, col_str, dtype=float):
        """Turn the input column into a numpy array if it wasn't already.
        Also make sure the input is 1-d.

        Parameters:
            col (array-like):   The input column to be converted into a numpy array.
            col_str (str):      The name of the column.  Used only as information in logging output.
            dtype (type):       The dtype for the returned array.  (default: float)

        Returns:
            The column converted to a 1-d numpy array.
        """
        if col is not None:
            col = np.array(col,dtype=dtype)
            if len(col.shape) != 1:
                s = col.shape
                col = col.reshape(-1)
                self.logger.warning("Warning: Input %s column was not 1-d.\n"%col_str +
                                    "         Reshaping from %s to %s"%(s,col.shape))
            col = np.ascontiguousarray(col[self.start:self.end:self.every_nth])
        return col

[docs]    def checkForNaN(self, col, col_str):
        """Check if the column has any NaNs.  If so, set those rows to have w[k]=0.

        Parameters:
            col (array):    The input column to check.
            col_str (str):  The name of the column.  Used only as information in logging output.
        """
        if col is not None and np.any(np.isnan(col)):
            index = np.where(np.isnan(col))[0]
            s = 's' if len(index) > 1 else ''
            self.logger.warning("Warning: %d NaN%s found in %s column.",len(index),s,col_str)
            if len(index) < 20:
                self.logger.info("Skipping row%s %s.",s,index.tolist())
            else:
                self.logger.info("Skipping rows starting %s",
                                 str(index[:10].tolist()).replace(']',' ...]'))
            if self._w is None:
                self._w = np.ones_like(col, dtype=float)
            self._w[index] = 0
            col[index] = 0  # Don't leave the nans there.

    def _check_file(self, file_name, reader, num=0, is_rand=False):
        # Just check the consistency of the various column numbers so we can fail fast.

        # Get the column names
        x_col = get_from_list(self.config,'x_col',num,str,'0')
        y_col = get_from_list(self.config,'y_col',num,str,'0')
        z_col = get_from_list(self.config,'z_col',num,str,'0')
        ra_col = get_from_list(self.config,'ra_col',num,str,'0')
        dec_col = get_from_list(self.config,'dec_col',num,str,'0')
        r_col = get_from_list(self.config,'r_col',num,str,'0')
        w_col = get_from_list(self.config,'w_col',num,str,'0')
        wpos_col = get_from_list(self.config,'wpos_col',num,str,'0')
        flag_col = get_from_list(self.config,'flag_col',num,str,'0')
        k_col = get_from_list(self.config,'k_col',num,str,'0')
        z1_col = get_from_list(self.config,'z1_col',num,str,'0')
        z2_col = get_from_list(self.config,'z2_col',num,str,'0')
        v1_col = get_from_list(self.config,'v1_col',num,str,'0')
        v2_col = get_from_list(self.config,'v2_col',num,str,'0')
        g1_col = get_from_list(self.config,'g1_col',num,str,'0')
        g2_col = get_from_list(self.config,'g2_col',num,str,'0')
        t1_col = get_from_list(self.config,'t1_col',num,str,'0')
        t2_col = get_from_list(self.config,'t2_col',num,str,'0')
        q1_col = get_from_list(self.config,'q1_col',num,str,'0')
        q2_col = get_from_list(self.config,'q2_col',num,str,'0')
        patch_col = get_from_list(self.config,'patch_col',num,str,'0')
        allow_xyz = self.config.get('allow_xyz', False)

        x_eval = get_from_list(self.config,'x_eval',num,str,None)
        y_eval = get_from_list(self.config,'y_eval',num,str,None)
        z_eval = get_from_list(self.config,'z_eval',num,str,None)
        ra_eval = get_from_list(self.config,'ra_eval',num,str,None)
        dec_eval = get_from_list(self.config,'dec_eval',num,str,None)
        r_eval = get_from_list(self.config,'r_eval',num,str,None)
        w_eval = get_from_list(self.config,'w_eval',num,str,None)
        wpos_eval = get_from_list(self.config,'wpos_eval',num,str,None)
        flag_eval = get_from_list(self.config,'flag_eval',num,str,None)
        k_eval = get_from_list(self.config,'k_eval',num,str,None)
        z1_eval = get_from_list(self.config,'z1_eval',num,str,None)
        z2_eval = get_from_list(self.config,'z2_eval',num,str,None)
        v1_eval = get_from_list(self.config,'v1_eval',num,str,None)
        v2_eval = get_from_list(self.config,'v2_eval',num,str,None)
        g1_eval = get_from_list(self.config,'g1_eval',num,str,None)
        g2_eval = get_from_list(self.config,'g2_eval',num,str,None)
        t1_eval = get_from_list(self.config,'t1_eval',num,str,None)
        t2_eval = get_from_list(self.config,'t2_eval',num,str,None)
        q1_eval = get_from_list(self.config,'q1_eval',num,str,None)
        q2_eval = get_from_list(self.config,'q2_eval',num,str,None)

        if x_col != '0' or y_col != '0' or x_eval is not None or y_eval is not None:
            if x_col == '0' and x_eval is None:
                raise ValueError("x_col missing for file %s"%file_name)
            if y_col == '0' and y_eval is None:
                raise ValueError("y_col missing for file %s"%file_name)
            if (ra_col != '0' or ra_eval is not None) and not allow_xyz:
                raise ValueError("ra_col not allowed in conjunction with x/y cols")
            if (dec_col != '0' or dec_eval is not None) and not allow_xyz:
                raise ValueError("dec_col not allowed in conjunction with x/y cols")
            if (r_col != '0' or r_eval is not None) and not allow_xyz:
                raise ValueError("r_col not allowed in conjunction with x/y cols")
        elif ra_col != '0' or dec_col != '0' or ra_eval is not None or dec_eval is not None:
            if ra_col == '0' and ra_eval is None:
                raise ValueError("ra_col missing for file %s"%file_name)
            if dec_col == '0' and dec_eval is None:
                raise ValueError("dec_col missing for file %s"%file_name)
            if (z_col != '0' or z_eval is not None) and not allow_xyz:
                raise ValueError("z_col not allowed in conjunction with ra/dec cols")
        else:
            raise ValueError("No valid position columns specified for file %s"%file_name)

        if k_col == '0' and k_eval is None and isKColRequired(self.orig_config,num):
            raise ValueError("k_col is missing for file %s"%file_name)
        if z1_col == '0' and z1_eval is None and isZColRequired(self.orig_config,num):
            raise ValueError("z1_col is missing for file %s"%file_name)
        if z2_col == '0' and z2_eval is None and isZColRequired(self.orig_config,num):
            raise ValueError("z2_col is missing for file %s"%file_name)
        if v1_col == '0' and v1_eval is None and isVColRequired(self.orig_config,num):
            raise ValueError("v1_col is missing for file %s"%file_name)
        if v2_col == '0' and v2_eval is None and isVColRequired(self.orig_config,num):
            raise ValueError("v2_col is missing for file %s"%file_name)
        if g1_col == '0' and g1_eval is None and isGColRequired(self.orig_config,num):
            raise ValueError("g1_col is missing for file %s"%file_name)
        if g2_col == '0' and g2_eval is None and isGColRequired(self.orig_config,num):
            raise ValueError("g2_col is missing for file %s"%file_name)
        if t1_col == '0' and t1_eval is None and isTColRequired(self.orig_config,num):
            raise ValueError("t1_col is missing for file %s"%file_name)
        if t2_col == '0' and t2_eval is None and isTColRequired(self.orig_config,num):
            raise ValueError("t2_col is missing for file %s"%file_name)
        if q1_col == '0' and q1_eval is None and isQColRequired(self.orig_config,num):
            raise ValueError("q1_col is missing for file %s"%file_name)
        if q2_col == '0' and q2_eval is None and isQColRequired(self.orig_config,num):
            raise ValueError("q2_col is missing for file %s"%file_name)

        # Either both shoudl be 0 or both != 0.
        if (z1_col == '0' and z1_eval is None) != (z2_col == '0' and z1_eval is None):
            raise ValueError("z1_col, z2_col=(%s, %s) are invalid for file %s"%(
                        z1_col,z2_col,file_name))
        if (v1_col == '0' and v1_eval is None) != (v2_col == '0' and v1_eval is None):
            raise ValueError("v1_col, v2_col=(%s, %s) are invalid for file %s"%(
                        v1_col,v2_col,file_name))
        if (g1_col == '0' and g1_eval is None) != (g2_col == '0' and g1_eval is None):
            raise ValueError("g1_col, g2_col=(%s, %s) are invalid for file %s"%(
                        g1_col,g2_col,file_name))
        if (t1_col == '0' and t1_eval is None) != (t2_col == '0' and t1_eval is None):
            raise ValueError("t1_col, t2_col=(%s, %s) are invalid for file %s"%(
                        t1_col,t2_col,file_name))
        if (q1_col == '0' and q1_eval is None) != (q2_col == '0' and q1_eval is None):
            raise ValueError("q1_col, q2_col=(%s, %s) are invalid for file %s"%(
                        q1_col,q2_col,file_name))

        # This opens the file enough to read things inside.  The full read doesn't happen here.
        with reader:
            # get the vanilla "ext" parameter
            ext = get_from_list(self.config, 'ext', num, str, reader.default_ext)

            # Technically, this doesn't catch all possible errors.  If someone specifies
            # an invalid flag_ext or something, then they'll get the fitsio error message.
            # But this should probably catch the majorit of error cases.
            # There are more possible errors using the eval options, but that's to be expected.
            # Those will usually raise something appropriately descriptive later.
            reader.check_valid_ext(ext)

            if x_col != '0' and y_col != '0':
                x_ext = get_from_list(self.config, 'x_ext', num, str, ext)
                y_ext = get_from_list(self.config, 'y_ext', num, str, ext)
                if x_col not in reader.names(ext=x_ext):
                    raise ValueError("x_col=%s is invalid for file %s"%(x_col,file_name))
                if y_col not in reader.names(ext=y_ext):
                    raise ValueError("y_col=%s is invalid for file %s"%(y_col, file_name))
                if z_col != '0':
                    z_ext = get_from_list(self.config, 'z_ext', num, str, ext)
                    if z_col not in reader.names(ext=z_ext):
                        raise ValueError("z_col=%s is invalid for file %s"%(z_col, file_name))
            elif ra_col != '0' and dec_col != '0':
                ra_ext = get_from_list(self.config, 'ra_ext', num, str, ext)
                dec_ext = get_from_list(self.config, 'dec_ext', num, str, ext)
                if ra_col not in reader.names(ext=ra_ext):
                    raise ValueError("ra_col=%s is invalid for file %s"%(ra_col, file_name))
                if dec_col not in reader.names(ext=dec_ext):
                    raise ValueError("dec_col=%s is invalid for file %s"%(dec_col, file_name))
                if r_col != '0':
                    r_ext = get_from_list(self.config, 'r_ext', num, str, ext)
                    if r_col not in reader.names(ext=r_ext):
                        raise ValueError("r_col=%s is invalid for file %s"%(r_col, file_name))

            if w_col != '0':
                w_ext = get_from_list(self.config, 'w_ext', num, str, ext)
                if w_col not in reader.names(ext=w_ext):
                    raise ValueError("w_col=%s is invalid for file %s"%(w_col, file_name))

            if wpos_col != '0':
                wpos_ext = get_from_list(self.config, 'wpos_ext', num, str, ext)
                if wpos_col not in reader.names(ext=wpos_ext):
                    raise ValueError("wpos_col=%s is invalid for file %s"%(wpos_col, file_name))

            if flag_col != '0':
                flag_ext = get_from_list(self.config, 'flag_ext', num, str, ext)
                if flag_col not in reader.names(ext=flag_ext):
                    raise ValueError("flag_col=%s is invalid for file %s"%(flag_col, file_name))

            if patch_col != '0':
                patch_ext = get_from_list(self.config, 'patch_ext', num, str, ext)
                if patch_col not in reader.names(ext=patch_ext):
                    raise ValueError("patch_col=%s is invalid for file %s"%(patch_col, file_name))

            if is_rand: return

            if k_col != '0':
                k_ext = get_from_list(self.config, 'k_ext', num, str, ext)
                if k_col not in reader.names(ext=k_ext):
                    if isKColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError("k_col=%s is invalid for file %s"%(k_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping k_col=%s for %s, num=%d "%(
                                k_col, file_name, num) +
                            "because it is invalid, but unneeded.")

            if z1_col != '0':
                z1_ext = get_from_list(self.config, 'z1_ext', num, str, ext)
                z2_ext = get_from_list(self.config, 'z2_ext', num, str, ext)
                if (z1_col not in reader.names(ext=z1_ext) or
                    z2_col not in reader.names(ext=z2_ext)):
                    if isZColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError(
                            "z1_col, z2_col=(%s, %s) are invalid for file %s"%(
                                z1_col, z2_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping z1_col, z2_col=(%s, %s) for %s, num=%d "%(
                                z1_col, z2_col, file_name, num) +
                            "because they are invalid, but unneeded.")

            if v1_col != '0':
                v1_ext = get_from_list(self.config, 'v1_ext', num, str, ext)
                v2_ext = get_from_list(self.config, 'v2_ext', num, str, ext)
                if (v1_col not in reader.names(ext=v1_ext) or
                    v2_col not in reader.names(ext=v2_ext)):
                    if isVColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError(
                            "v1_col, v2_col=(%s, %s) are invalid for file %s"%(
                                v1_col, v2_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping v1_col, v2_col=(%s, %s) for %s, num=%d "%(
                                v1_col, v2_col, file_name, num) +
                            "because they are invalid, but unneeded.")

            if g1_col != '0':
                g1_ext = get_from_list(self.config, 'g1_ext', num, str, ext)
                g2_ext = get_from_list(self.config, 'g2_ext', num, str, ext)
                if (g1_col not in reader.names(ext=g1_ext) or
                    g2_col not in reader.names(ext=g2_ext)):
                    if isGColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError(
                            "g1_col, g2_col=(%s, %s) are invalid for file %s"%(
                                g1_col, g2_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping g1_col, g2_col=(%s, %s) for %s, num=%d "%(
                                g1_col, g2_col, file_name, num) +
                            "because they are invalid, but unneeded.")

            if t1_col != '0':
                t1_ext = get_from_list(self.config, 't1_ext', num, str, ext)
                t2_ext = get_from_list(self.config, 't2_ext', num, str, ext)
                if (t1_col not in reader.names(ext=t1_ext) or
                    t2_col not in reader.names(ext=t2_ext)):
                    if isTColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError(
                            "t1_col, t2_col=(%s, %s) are invalid for file %s"%(
                                t1_col, t2_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping t1_col, t2_col=(%s, %s) for %s, num=%d "%(
                                t1_col, t2_col, file_name, num) +
                            "because they are invalid, but unneeded.")

            if q1_col != '0':
                q1_ext = get_from_list(self.config, 'q1_ext', num, str, ext)
                q2_ext = get_from_list(self.config, 'q2_ext', num, str, ext)
                if (q1_col not in reader.names(ext=q1_ext) or
                    q2_col not in reader.names(ext=q2_ext)):
                    if isQColRequired(self.orig_config,num) or 'corr2' not in self.orig_config:
                        raise ValueError(
                            "q1_col, q2_col=(%s, %s) are invalid for file %s"%(
                                q1_col, q2_col, file_name))
                    else:
                        self.logger.warning(
                            "Warning: skipping q1_col, q2_col=(%s, %s) for %s, num=%d "%(
                                q1_col, q2_col, file_name, num) +
                            "because they are invalid, but unneeded.")

    def _pandas_warning(self):
        if Catalog._emitted_pandas_warning:
            return
        self.logger.warning(
            "Unable to import pandas..  Using np.genfromtxt instead.\n"+
            "Installing pandas is recommended for increased speed when "+
            "reading ASCII catalogs.")
        Catalog._emitted_pandas_warning = True

    def _read_file(self, file_name, reader, num, is_rand):
        # Some helper functions for reading and setting individual values.

        def math_eval(s, data):
            gdict = globals().copy()
            for mod in Catalog.eval_modules:
                exec('import ' + mod, gdict)
            gdict['data'] = data
            for k in data:
                if k in s:
                    exec(f"{k} = data['{k}']", gdict)
            return eval(s, gdict)

        def parse_value(data, x_col, x_eval, dtype=float):
            # Here x is any value name, not specifically x.  It's just a convenient letter.
            if x_eval is not None:
                x = math_eval(x_eval, data)
            else:
                x = data[x_col]
            return x.astype(dtype)

        def set_pos(data, x_col, y_col, z_col, ra_col, dec_col, r_col,
                    x_eval, y_eval, z_eval, ra_eval, dec_eval, r_eval):
            if x_col != '0' or x_eval is not None:
                self._x = parse_value(data, x_col, x_eval)
                self.logger.debug('read x')
                self._y = parse_value(data, y_col, y_eval)
                self.logger.debug('read y')
                if z_col != '0' or z_eval is not None:
                    xyz = True
                    self._z = parse_value(data, z_col, z_eval)
                    self.logger.debug('read z')
                self._apply_xyz_units()

            if ra_col != '0' or ra_eval is not None:
                self._ra = parse_value(data, ra_col, ra_eval)
                self.logger.debug('read ra')
                self._dec = parse_value(data, dec_col, dec_eval)
                self.logger.debug('read dec')
                if r_col != '0' or r_eval is not None:
                    self._r = parse_value(data, r_col, r_eval)
                    self.logger.debug('read r')
                self._apply_radec_units()

        def set_patch(data, patch_col):
            if patch_col != '0' and patch_col in data:
                self._patch = data[patch_col].astype(int)
                self.logger.debug('read patch')
                self._set_npatch()

        # Get the column names
        x_col = get_from_list(self.config,'x_col',num,str,'0')
        y_col = get_from_list(self.config,'y_col',num,str,'0')
        z_col = get_from_list(self.config,'z_col',num,str,'0')
        ra_col = get_from_list(self.config,'ra_col',num,str,'0')
        dec_col = get_from_list(self.config,'dec_col',num,str,'0')
        r_col = get_from_list(self.config,'r_col',num,str,'0')
        w_col = get_from_list(self.config,'w_col',num,str,'0')
        wpos_col = get_from_list(self.config,'wpos_col',num,str,'0')
        flag_col = get_from_list(self.config,'flag_col',num,str,'0')
        k_col = get_from_list(self.config,'k_col',num,str,'0')
        z1_col = get_from_list(self.config,'z1_col',num,str,'0')
        z2_col = get_from_list(self.config,'z2_col',num,str,'0')
        v1_col = get_from_list(self.config,'v1_col',num,str,'0')
        v2_col = get_from_list(self.config,'v2_col',num,str,'0')
        g1_col = get_from_list(self.config,'g1_col',num,str,'0')
        g2_col = get_from_list(self.config,'g2_col',num,str,'0')
        t1_col = get_from_list(self.config,'t1_col',num,str,'0')
        t2_col = get_from_list(self.config,'t2_col',num,str,'0')
        q1_col = get_from_list(self.config,'q1_col',num,str,'0')
        q2_col = get_from_list(self.config,'q2_col',num,str,'0')
        patch_col = get_from_list(self.config,'patch_col',num,str,'0')

        x_eval = get_from_list(self.config,'x_eval',num,str,None)
        y_eval = get_from_list(self.config,'y_eval',num,str,None)
        z_eval = get_from_list(self.config,'z_eval',num,str,None)
        ra_eval = get_from_list(self.config,'ra_eval',num,str,None)
        dec_eval = get_from_list(self.config,'dec_eval',num,str,None)
        r_eval = get_from_list(self.config,'r_eval',num,str,None)
        w_eval = get_from_list(self.config,'w_eval',num,str,None)
        wpos_eval = get_from_list(self.config,'wpos_eval',num,str,None)
        flag_eval = get_from_list(self.config,'flag_eval',num,str,None)
        k_eval = get_from_list(self.config,'k_eval',num,str,None)
        z1_eval = get_from_list(self.config,'z1_eval',num,str,None)
        z2_eval = get_from_list(self.config,'z2_eval',num,str,None)
        v1_eval = get_from_list(self.config,'v1_eval',num,str,None)
        v2_eval = get_from_list(self.config,'v2_eval',num,str,None)
        g1_eval = get_from_list(self.config,'g1_eval',num,str,None)
        g2_eval = get_from_list(self.config,'g2_eval',num,str,None)
        t1_eval = get_from_list(self.config,'t1_eval',num,str,None)
        t2_eval = get_from_list(self.config,'t2_eval',num,str,None)
        q1_eval = get_from_list(self.config,'q1_eval',num,str,None)
        q2_eval = get_from_list(self.config,'q2_eval',num,str,None)
        extra_cols = get(self.config,'extra_cols',list,[])

        with reader:
            ext = get_from_list(self.config, 'ext', num, str, reader.default_ext)

            # Figure out what slice to use.  If all rows, then None is faster,
            # otherwise give the range explicitly.
            if self.start == 0 and self.end is None and self.every_nth == 1:
                s = slice(None)
            else:
                s = slice(self.start, self.end, self.every_nth)

            all_cols = [x_col, y_col, z_col,
                        ra_col, dec_col, r_col,
                        patch_col,
                        w_col, wpos_col, flag_col,
                        k_col, z1_col, z2_col, v1_col, v2_col, g1_col, g2_col,
                        t1_col, t2_col, q1_col, q2_col]

            # It's faster in FITS to read in all the columns in one read, rather than individually.
            # Typically (very close to always!), all the columns are in the same extension.
            # Thus, the following would normally work fine.
            #     use_cols = [c for c in all_cols if c != '0']
            #     data = fits[ext][use_cols][:]
            # However, we allow the option to have different columns read from different extensions.
            # So this is slightly more complicated.
            x_ext = get_from_list(self.config, 'x_ext', num, str, ext)
            y_ext = get_from_list(self.config, 'y_ext', num, str, ext)
            z_ext = get_from_list(self.config, 'z_ext', num, str, ext)
            ra_ext = get_from_list(self.config, 'ra_ext', num, str, ext)
            dec_ext = get_from_list(self.config, 'dec_ext', num, str, ext)
            r_ext = get_from_list(self.config, 'r_ext', num, str, ext)
            patch_ext = get_from_list(self.config, 'patch_ext', num, str, ext)
            w_ext = get_from_list(self.config, 'w_ext', num, str, ext)
            wpos_ext = get_from_list(self.config, 'wpos_ext', num, str, ext)
            flag_ext = get_from_list(self.config, 'flag_ext', num, str, ext)
            k_ext = get_from_list(self.config, 'k_ext', num, str, ext)
            z1_ext = get_from_list(self.config, 'z1_ext', num, str, ext)
            z2_ext = get_from_list(self.config, 'z2_ext', num, str, ext)
            v1_ext = get_from_list(self.config, 'v1_ext', num, str, ext)
            v2_ext = get_from_list(self.config, 'v2_ext', num, str, ext)
            g1_ext = get_from_list(self.config, 'g1_ext', num, str, ext)
            g2_ext = get_from_list(self.config, 'g2_ext', num, str, ext)
            t1_ext = get_from_list(self.config, 't1_ext', num, str, ext)
            t2_ext = get_from_list(self.config, 't2_ext', num, str, ext)
            q1_ext = get_from_list(self.config, 'q1_ext', num, str, ext)
            q2_ext = get_from_list(self.config, 'q2_ext', num, str, ext)
            all_exts = [x_ext, y_ext, z_ext,
                        ra_ext, dec_ext, r_ext,
                        patch_ext,
                        w_ext, wpos_ext, flag_ext,
                        k_ext, z1_ext, z2_ext, v1_ext, v2_ext, g1_ext, g2_ext,
                        t1_ext, t2_ext, q1_ext, q2_ext]
            col_by_ext = dict(zip(all_cols,all_exts))
            all_exts = set(all_exts + [ext])
            all_cols = [c for c in all_cols if c != '0']

            data = {}
            # Also, if we are only reading in one patch, we should adjust s before doing this.
            if self._single_patch is not None:
                if patch_col != '0':
                    data[patch_col] = reader.read(patch_col, s, ext=patch_ext)
                    all_cols.remove(patch_col)
                    set_patch(data, patch_col)
                elif self._centers is not None:
                    pos_cols = [x_col, y_col, z_col, ra_col, dec_col, r_col]
                    pos_cols = [c for c in pos_cols if c != '0']
                    for c in pos_cols:
                        all_cols.remove(c)
                    for ext in all_exts:
                        use_cols1 = [c for c in pos_cols if col_by_ext[c] == ext]
                        use_cols1 += [c for c in extra_cols if c in reader.names(ext=ext)]
                        data1 = reader.read(use_cols1, s, ext=ext)
                        for c in use_cols1:
                            data[c] = data1[c]
                    set_pos(data, x_col, y_col, z_col, ra_col, dec_col, r_col,
                            x_eval, y_eval, z_eval, ra_eval, dec_eval, r_eval)
                    x_col = y_col = z_col = ra_col = dec_col = r_col = '0'
                    x_eval = y_eval = z_eval = ra_eval = dec_eval = r_eval = None
                use = self._get_patch_index(self._single_patch)
                self.select(use)
                if s == slice(None):
                    s = use
                else:
                    end1 = np.max(use)*s.step+s.start+1
                    s = np.arange(s.start, end1, s.step)[use]
                self._patch = None
                data = {}  # Start fresh, since the ones we used so far are done.

                # We might actually be done now, in which case, just return.
                # (Else the fits read below won't actually work.)
                if len(all_cols) == 0 or (isinstance(s,np.ndarray) and len(s) == 0):
                    return

            # Now read the rest using the updated s
            for ext in all_exts:
                use_cols1 = [c for c in all_cols
                             if col_by_ext[c] == ext and c in reader.names(ext=ext)]
                use_cols1 += [c for c in extra_cols if c in reader.names(ext=ext)]
                if len(use_cols1) == 0:
                    continue
                data1 = reader.read(use_cols1, s, ext=ext)
                for c in use_cols1:
                    data[c] = data1[c]

            # Set position values
            set_pos(data, x_col, y_col, z_col, ra_col, dec_col, r_col,
                    x_eval, y_eval, z_eval, ra_eval, dec_eval, r_eval)

            # Set patch
            set_patch(data, patch_col)

            # Set w
            if w_col != '0' or w_eval is not None:
                self._w = parse_value(data, w_col, w_eval)
                self.logger.debug('read w')

            # Set wpos
            if wpos_col != '0' or wpos_eval is not None:
                self._wpos = parse_value(data, wpos_col, wpos_eval)
                self.logger.debug('read wpos')

            # Set flag
            if flag_col != '0' or flag_eval is not None:
                self._flag = parse_value(data, flag_col, flag_eval, int)
                self.logger.debug('read flag')

            # Skip k,v,g,t,q if this file is a random catalog
            if not is_rand:
                # Set k
                if k_col in reader.names(ext=k_ext) or k_eval is not None:
                    self._k = parse_value(data, k_col, k_eval)
                    self.logger.debug('read k')

                # Set z1,z2
                if z1_col in reader.names(ext=z1_ext) or z1_eval is not None:
                    self._z1 = parse_value(data, z1_col, z1_eval)
                    self.logger.debug('read z1')
                    self._z2 = parse_value(data, z2_col, z2_eval)
                    self.logger.debug('read z2')

                # Set v1,v2
                if v1_col in reader.names(ext=v1_ext) or v1_eval is not None:
                    self._v1 = parse_value(data, v1_col, v1_eval)
                    self.logger.debug('read v1')
                    self._v2 = parse_value(data, v2_col, v2_eval)
                    self.logger.debug('read v2')

                # Set g1,g2
                if g1_col in reader.names(ext=g1_ext) or g1_eval is not None:
                    self._g1 = parse_value(data, g1_col, g1_eval)
                    self.logger.debug('read g1')
                    self._g2 = parse_value(data, g2_col, g2_eval)
                    self.logger.debug('read g2')

                # Set t1,t2
                if t1_col in reader.names(ext=t1_ext) or t1_eval is not None:
                    self._t1 = parse_value(data, t1_col, t1_eval)
                    self.logger.debug('read t1')
                    self._t2 = parse_value(data, t2_col, t2_eval)
                    self.logger.debug('read t2')

                # Set q1,q2
                if q1_col in reader.names(ext=q1_ext) or q1_eval is not None:
                    self._q1 = parse_value(data, q1_col, q1_eval)
                    self.logger.debug('read q1')
                    self._q2 = parse_value(data, q2_col, q2_eval)
                    self.logger.debug('read q2')

    @property
    def nfields(self):
        if not hasattr(self, '_nfields'):
            # Make simple functions that call NField, etc. with self as the first argument.
            # Note: LRU_Cache keys on the args, not kwargs, so everything but logger should
            # be in args for this function.  We convert them to kwargs for the NFields init call.
            def get_nfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return NField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            # Now wrap these in LRU_Caches with (initially) just 1 element being cached.
            self._nfields = LRU_Cache(get_nfield, 1)
        return self._nfields

    @property
    def kfields(self):
        if not hasattr(self, '_kfields'):
            def get_kfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return KField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._kfields = LRU_Cache(get_kfield, 1)
        return self._kfields

    @property
    def zfields(self):
        if not hasattr(self, '_zfields'):
            def get_zfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return ZField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._zfields = LRU_Cache(get_zfield, 1)
        return self._zfields

    @property
    def vfields(self):
        if not hasattr(self, '_vfields'):
            def get_vfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return VField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._vfields = LRU_Cache(get_vfield, 1)
        return self._vfields

    @property
    def gfields(self):
        if not hasattr(self, '_gfields'):
            def get_gfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return GField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._gfields = LRU_Cache(get_gfield, 1)
        return self._gfields

    @property
    def tfields(self):
        if not hasattr(self, '_tfields'):
            def get_tfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return TField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._tfields = LRU_Cache(get_tfield, 1)
        return self._tfields

    @property
    def qfields(self):
        if not hasattr(self, '_qfields'):
            def get_qfield(min_size, max_size, split_method, brute, min_top, max_top, coords,
                           rng, logger=None):
                return QField(self, min_size=min_size, max_size=max_size,
                              split_method=split_method, brute=brute,
                              min_top=min_top, max_top=max_top, coords=coords,
                              rng=rng, logger=logger)
            self._qfields = LRU_Cache(get_qfield, 1)
        return self._qfields

[docs]    def resize_cache(self, maxsize):
        """Resize all field caches.

        The various kinds of fields built from this catalog are cached.  This may or may not
        be an optimization for your use case.  Normally only a single field is built for a
        given catalog, and it is usually efficient to cache it, so it can be reused multiple
        times.  E.g. for the usual Landy-Szalay NN calculation:

            >>> dd.process(data_cat)
            >>> rr.process(rand_cat)
            >>> dr.process(data_cat, rand_cat)

        the third line will be able to reuse the same fields built for the data and randoms
        in the first two lines.

        However, if you are making many different fields from the same catalog -- for instance
        because you keep changing the min_sep and max_sep for different calls -- then saving
        them all will tend to blow up the memory.

        Therefore, the default number of fields (of each type) to cache is 1.  This lets the
        first use case be efficient, but not use too much memory for the latter case.

        If you prefer a different behavior, this method lets you change the number of fields to
        cache.  The cache is an LRU (Least Recently Used) cache, which means only the n most
        recently used fields are saved.  I.e. when it is full, the least recently used field
        is removed from the cache.

        If you call this with maxsize=0, then caching will be turned off.  A new field will be
        built each time you call a process function with this catalog.

        If you call this with maxsize>1, then mutiple fields will be saved according to whatever
        number you set.  This will use more memory, but may be an optimization for you depending
        on what your are doing.

        Finally, if you want to set different sizes for the different kinds of fields, then
        you can call resize separately for the different caches:

            >>> cat.nfields.resize(maxsize)
            >>> cat.kfields.resize(maxsize)
            >>> cat.zfields.resize(maxsize)
            >>> cat.vfields.resize(maxsize)
            >>> cat.gfields.resize(maxsize)
            >>> cat.tfields.resize(maxsize)
            >>> cat.qfields.resize(maxsize)

        Parameters:
            maxsize (float):    The new maximum number of fields of each type to cache.
        """
        if hasattr(self, '_nfields'): self.nfields.resize(maxsize)
        if hasattr(self, '_kfields'): self.kfields.resize(maxsize)
        if hasattr(self, '_zfields'): self.zfields.resize(maxsize)
        if hasattr(self, '_vfields'): self.vfields.resize(maxsize)
        if hasattr(self, '_gfields'): self.gfields.resize(maxsize)
        if hasattr(self, '_tfields'): self.tfields.resize(maxsize)
        if hasattr(self, '_qfields'): self.qfields.resize(maxsize)

[docs]    def clear_cache(self):
        """Clear all field caches.

        The various kinds of fields built from this catalog are cached.  This may or may not
        be an optimization for your use case.  Normally only a single field is built for a
        given catalog, and it is usually efficient to cache it, so it can be reused multiple
        times.  E.g. for the usual Landy-Szalay NN calculation:

            >>> dd.process(data_cat)
            >>> rr.process(rand_cat)
            >>> dr.process(data_cat, rand_cat)

        the third line will be able to reuse the same fields built for the data and randoms
        in the first two lines.

        However, this also means that the memory used for the field will persist as long as
        the catalog object does.  If you need to recover this memory and don't want to delete
        the catalog yet, this method lets you clear the cache.

        There are separate caches for each kind of field.  If you want to clear just one or
        some of them, you can call clear separately for the different caches:

            >>> cat.nfields.clear()
            >>> cat.kfields.clear()
            >>> cat.zfields.clear()
            >>> cat.vfields.clear()
            >>> cat.gfields.clear()
            >>> cat.tfields.clear()
            >>> cat.qfields.clear()
        """
        if hasattr(self, '_nfields'): self.nfields.clear()
        if hasattr(self, '_kfields'): self.kfields.clear()
        if hasattr(self, '_zfields'): self.zfields.clear()
        if hasattr(self, '_vfields'): self.vfields.clear()
        if hasattr(self, '_gfields'): self.gfields.clear()
        if hasattr(self, '_tfields'): self.tfields.clear()
        if hasattr(self, '_qfields'): self.qfields.clear()
        self._field = lambda : None  # Acts like a dead weakref

    @property
    def field(self):
        # The default is to return None here.
        # This might also return None if weakref has expired.
        # But if the weakref is alive, this returns the field we want.
        return self._field()

[docs]    def getNField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return an `NField` based on the positions in this catalog.

        The `NField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default: self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            An `NField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if logger is None:
            logger = self.logger
        field = self.nfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field


[docs]    def getKField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `KField` based on the k values in this catalog.

        The `KField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `KField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.k is None:
            raise TypeError("k is not defined.")
        if logger is None:
            logger = self.logger
        field = self.kfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

[docs]    def getZField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `VField` based on the z1,z2 values in this catalog.

        The `VField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`.

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `ZField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.z1 is None or self.z2 is None:
            raise TypeError("z1,z2 are not defined.")
        if logger is None:
            logger = self.logger
        field = self.zfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

[docs]    def getVField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `VField` based on the v1,v2 values in this catalog.

        The `VField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`.

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `VField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.v1 is None or self.v2 is None:
            raise TypeError("v1,v2 are not defined.")
        if logger is None:
            logger = self.logger
        field = self.vfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

[docs]    def getGField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `GField` based on the g1,g2 values in this catalog.

        The `GField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`.

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `GField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.g1 is None or self.g2 is None:
            raise TypeError("g1,g2 are not defined.")
        if logger is None:
            logger = self.logger
        field = self.gfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

[docs]    def getTField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `TField` based on the t1,t2 values in this catalog.

        The `TField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`.

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `TField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.t1 is None or self.t2 is None:
            raise TypeError("t1,t2 are not defined.")
        if logger is None:
            logger = self.logger
        field = self.tfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

[docs]    def getQField(self, *, min_size=0, max_size=None, split_method=None, brute=False,
                  min_top=None, max_top=10, coords=None, logger=None):
        """Return a `QField` based on the q1,q2 values in this catalog.

        The `QField` object is cached, so this is efficient to call multiple times.
        cf. `resize_cache` and `clear_cache`.

        Parameters:
            min_size (float):   The minimum radius cell required (usually min_sep). (default: 0)
            max_size (float):   The maximum radius cell required (usually max_sep). (default: None)
            split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random')
                                (default: 'mean'; this value can also be given in the Catalog
                                constructor in the config dict.)
            brute (bool):       Whether to force traversal to the leaves. (default: False)
            min_top (int):      The minimum number of top layers to use when setting up the
                                field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`)
            max_top (int):      The maximum number of top layers to use when setting up the
                                field. (default: 10)
            coords (str):       The kind of coordinate system to use. (default self.coords)
            logger:             A Logger object if desired (default: self.logger)

        Returns:
            A `QField` object
        """
        if split_method is None:
            split_method = get(self.config,'split_method',str,'mean')
        if self.q1 is None or self.q2 is None:
            raise TypeError("q1,q2 are not defined.")
        if logger is None:
            logger = self.logger
        field = self.qfields(min_size, max_size, split_method, brute, min_top, max_top, coords,
                             rng=self._rng, logger=logger)
        self._field = weakref.ref(field)
        return field

    def _weighted_mean(self, x, idx=None):
        # Find the weighted mean of some column.
        # If weights are set, then return sum(w * x) / sum(w)
        # Else, just sum(x) / N
        if self.nontrivial_w:
            if idx is None:
                return np.sum(x * self.w) / self.sumw
            else:
                return np.sum(x[idx] * self.w[idx]) / np.sum(self.w[idx])
        else:
            return np.mean(x[idx])

[docs]    def get_patch_centers(self):
        """Return an array of patch centers corresponding to the patches in this catalog.

        If the patches were set either using K-Means or by giving the centers, then this
        will just return that same center array.  Otherwise, it will be calculated from the
        positions of the objects with each patch number.

        This function is automatically called when accessing the property
        ``patch_centers``.  So you should not normally need to call it directly.

        Returns:
            An array of center coordinates used to make the patches.
            Shape is (npatch, 2) for flat geometries or (npatch, 3) for 3d or
            spherical geometries.  In the latter case, the centers represent
            (x,y,z) coordinates on the unit sphere.
        """
        # Early exit
        if self._centers is not None:
            return self._centers

        # Distinguish between the origianl patch centers we got on input (self._centers)
        # and the centers we find from the data by averaging the position in each patch
        # (self._computed_centers).  There are times that we only want the former.
        if self._computed_centers is None:
            self.load()
            if self._patch is None:
                if self.coords == 'flat':
                    centers = np.array([[self._weighted_mean(self.x),
                                        self._weighted_mean(self.y)]])
                else:
                    centers = np.array([[self._weighted_mean(self.x),
                                        self._weighted_mean(self.y),
                                        self._weighted_mean(self.z)]])
            else:
                centers = np.empty((self.npatch,2 if self.z is None else 3))
                for p in range(self.npatch):
                    indx = np.where(self.patch == p)[0]
                    if len(indx) == 0:
                        raise RuntimeError("Cannot find center for patch %s."%p +
                                        "  No items with this patch number")
                    if self.coords == 'flat':
                        centers[p] = [self._weighted_mean(self.x,indx),
                                    self._weighted_mean(self.y,indx)]
                    else:
                        centers[p] = [self._weighted_mean(self.x,indx),
                                    self._weighted_mean(self.y,indx),
                                    self._weighted_mean(self.z,indx)]
            if self.coords == 'spherical':
                centers /= np.sqrt(np.sum(centers**2,axis=1))[:,np.newaxis]
            self._computed_centers = centers
        return self._computed_centers

[docs]    def write_patch_centers(self, file_name):
        """Write the patch centers to a file.

        The output file will include the following columns:

        ========      =======================================================
        Column        Description
        ========      =======================================================
        patch         patch number (0..npatch-1)
        x             mean x values
        y             mean y values
        z             mean z values (only for spherical or 3d coordinates)
        ========      =======================================================

        It will write a FITS file if the file name ends with '.fits', otherwise an ASCII file.

        Parameters:
            file_name (str):    The name of the file to write to.
        """
        self.logger.info('Writing centers to %s',file_name)

        centers = self.patch_centers
        col_names = ['patch', 'x', 'y']
        if self.coords != 'flat':
            col_names.append('z')
        columns = [np.arange(centers.shape[0])]
        for i in range(centers.shape[1]):
            columns.append(centers[:,i])

        with make_writer(file_name, precision=16, logger=self.logger) as writer:
            writer.write(col_names, columns)

[docs]    def read_patch_centers(self, file_name):
        """Read patch centers from a file.

        This function typically gets called automatically when setting patch_centers as a
        string, being the file name.  The patch centers are read from the file and returned.

        Parameters:
            file_name (str):    The name of the file to write to.

        Returns:
            The centers, as an array, which can be used to determine the patches.
        """
        self.logger.info('Reading centers from %s',file_name)

        with make_reader(file_name, logger=self.logger) as reader:
            data = reader.read_data()
        if 'z' in data.dtype.names:
            return np.column_stack((data['x'],data['y'],data['z']))
        else:
            return np.column_stack((data['x'],data['y']))

[docs]    def load(self):
        """Load the data from a file, if it isn't yet loaded.

        When a Catalog is read in from a file, it tries to delay the loading of the data from
        disk until it is actually needed.  This is especially important when running over a
        set of patches, since you may not be able to fit all the patches in memory at once.

        One does not normally need to call this method explicitly.  It will run automatically
        whenever the data is needed.  However, if you want to directly control when the disk
        access happens, you can use this function.
        """
        if not self.loaded:
            self.logger.info("Reading input file %s",self.name)
            self._read_file(self.file_name, self.reader, self._num, self._is_rand)
            self._finish_input()

[docs]    def unload(self):
        """Bring the Catalog back to an "unloaded" state, if possible.

        When a Catalog is read in from a file, it tries to delay the loading of the data from
        disk until it is actually needed.  After loading, this method will return the Catalog
        back to the unloaded state to recover the memory in the data arrays. If the Catalog is
        needed again during further processing, it will re-load the data from disk at that time.

        This will also call `clear_cache` to recover any memory from fields that have been
        constructed as well.

        If the Catalog was not read in from a file, then this function will only do the
        `clear_cache` step.
        """
        if self.file_type is not None:
            self._x = None
            self._y = None
            self._z = None
            self._ra = None
            self._dec = None
            self._r = None
            self._w = None
            self._wpos = None
            self._k = None
            self._z1 = None
            self._z2 = None
            self._v1 = None
            self._v2 = None
            self._g1 = None
            self._g2 = None
            self._t1 = None
            self._t2 = None
            self._q1 = None
            self._q2 = None
            self._patch = None
            if self._patches is not None:
                for p in self._patches:
                    if p is not self:
                        p.unload()
        self.clear_cache()

[docs]    def get_patch_file_names(self, save_patch_dir):
        """Get the names of the files to use for reading/writing patches in save_patch_dir
        """
        if self.file_name is not None:
            base, ext = os.path.splitext(os.path.basename(self.file_name))
            # Default to FITS file if we do not otherwise recognize the file type.
            # It's not critical to match this, just a convenience for if the user
            # wants to pre-create their own patch files.
            if ext.lower() not in [".fits", ".fit", ".hdf5", ".hdf"]:
                ext = ".fits"
            names = [base + '_%03d%s'%(i, ext) for i in range(self.npatch)]
        else:
            names = ['patch%03d.fits'%i for i in range(self.npatch)]
        return [os.path.join(save_patch_dir, n) for n in names]

[docs]    def write_patches(self, save_patch_dir=None):
        """Write the patches to disk as separate files.

        This can be used in conjunction with ``low_mem=True`` option of `get_patches` (and
        implicitly by the various `process <Corr2.process>` methods) to only keep
        at most two patches in memory at a time.

        Parameters:
            save_patch_dir (str):   The directory to write the patches to. (default: None, in which
                                    case self.save_patch_dir will be used.  If that is None, a
                                    ValueError will be raised.)
        """
        if save_patch_dir is None:
            save_patch_dir = self.save_patch_dir
        if save_patch_dir is None:
            raise ValueError("save_patch_dir is required here, since not given in constructor.")

        file_names = self.get_patch_file_names(save_patch_dir)
        # This next bit looks gratuitous, but without it, it's possible for self.patches to
        # end up reading the existing files rather than build the patches and overwrite them.
        for file_name in file_names:
            if os.path.exists(file_name):
                os.remove(file_name)
        for i, p, file_name in zip(range(self.npatch), self.patches, file_names):
            self.logger.info('Writing patch %d to %s',i,file_name)
            if p.ra is not None:
                # Don't multiply and divide by the units on round trip.
                p.ra_units = p.dec_units = 1
            p.write(file_name)

[docs]    def read_patches(self, save_patch_dir=None):
        """Read the patches from files on disk.

        This function assumes that the patches were written using `write_patches`.
        In particular, the file names are not arbitrary, but must match what TreeCorr uses
        in that method.

        .. note::

            The patches that are read in will be in an "unloaded" state.  They will load
            as needed when some functionality requires it.  So this is compatible with using
            the ``low_mem`` option in various places.

        Parameters:
            save_patch_dir (str):   The directory to read from. (default: None, in which
                                    case self.save_patch_dir will be used.  If that is None, a
                                    ValueError will be raised.)
        """
        if save_patch_dir is None:
            save_patch_dir = self.save_patch_dir
        if save_patch_dir is None:
            raise ValueError("save_patch_dir is required here, since not given in constructor.")

        # Need to be careful here to not trigger a load by accident.
        # This would be easier if we just checked e.g. if self.ra is not None, etc.
        # But that would trigger an unnecessary load if we aren't loaded yet.
        # So do all this with the underscore attributes.
        kwargs = {}
        if self._ra is not None or self.config.get('ra_col','0') != '0':
            kwargs['ra_col'] = 'ra'
            kwargs['dec_col'] = 'dec'
            kwargs['ra_units'] = 'rad'
            kwargs['dec_units'] = 'rad'
            if self._r is not None or self.config.get('r_col','0') != '0':
                kwargs['r_col'] = 'r'
        else:
            kwargs['x_col'] = 'x'
            kwargs['y_col'] = 'y'
            if self._z is not None or self.config.get('z_col','0') != '0':
                kwargs['z_col'] = 'z'
        if (self._w is not None and self._nontrivial_w)  or self.config.get('w_col','0') != '0':
            kwargs['w_col'] = 'w'
        if self._wpos is not None or self.config.get('wpos_col','0') != '0':
            kwargs['wpos_col'] = 'wpos'
        if self._k is not None or self.config.get('k_col','0') != '0':
            kwargs['k_col'] = 'k'
        if self._z1 is not None or self.config.get('z1_col','0') != '0':
            kwargs['z1_col'] = 'z1'
        if self._z2 is not None or self.config.get('z2_col','0') != '0':
            kwargs['z2_col'] = 'z2'
        if self._v1 is not None or self.config.get('v1_col','0') != '0':
            kwargs['v1_col'] = 'v1'
        if self._v2 is not None or self.config.get('v2_col','0') != '0':
            kwargs['v2_col'] = 'v2'
        if self._g1 is not None or self.config.get('g1_col','0') != '0':
            kwargs['g1_col'] = 'g1'
        if self._g2 is not None or self.config.get('g2_col','0') != '0':
            kwargs['g2_col'] = 'g2'
        if self._t1 is not None or self.config.get('t1_col','0') != '0':
            kwargs['t1_col'] = 't1'
        if self._t2 is not None or self.config.get('t2_col','0') != '0':
            kwargs['t2_col'] = 't2'
        if self._q1 is not None or self.config.get('q1_col','0') != '0':
            kwargs['q1_col'] = 'q1'
        if self._q2 is not None or self.config.get('q2_col','0') != '0':
            kwargs['q2_col'] = 'q2'

        file_names = self.get_patch_file_names(save_patch_dir)
        self._patches = []
        # Check that the files exist, although we won't actually load them yet.
        for i, file_name in zip(range(self.npatch), file_names):
            if not os.path.isfile(file_name):
                raise OSError("Patch file %s not found"%file_name)
        self._patches = [Catalog(file_name=name, patch=i, npatch=self.npatch, **kwargs)
                         for i, name in enumerate(file_names)]
        self.logger.info('Patches created from files %s .. %s',file_names[0],file_names[-1])

[docs]    def get_patches(self, *, low_mem=False):
        """Return a list of Catalog instances each representing a single patch from this Catalog

        After calling this function once, the patches may be repeatedly accessed by the
        ``patches`` attribute, without triggering a rebuild of the patches.  Furthermore,
        if ``patches`` is accessed before calling this function, it will be called automatically
        (with the default low_mem parameter).

        Parameters:
            low_mem (bool):     Whether to try to leave the returned patch catalogs in an
                                "unloaded" state, wherein they will not load the data from a
                                file until they are used.  This only works if the current catalog
                                was loaded from a file or the patches were saved (using
                                ``save_patch_dir``). (default: False)
        """
        # Early exit
        if self._patches is not None:
            return self._patches
        if self.npatch == 1 or self._single_patch is not None:
            self._patches = [self]
            return self._patches

        # See if we have patches already written to disk.  If so, use them.
        if self.save_patch_dir is not None:
            try:
                self.read_patches()
            except OSError:
                # No problem.  We'll make them and write them out below.
                pass
            else:
                return self._patches

        if low_mem and self.file_name is not None:
            # This is a litle tricky, since we don't want to trigger a load if the catalog
            # isn't loaded yet.  So try to get the patches from centers or single_patch first.
            if self._centers is not None:
                patch_set = range(len(self._centers))
            else:
                # This triggers a load of the current catalog, but no choice here.
                patch_set = sorted(set(self.patch))
            self._patches = [Catalog(config=self.config, file_name=self.file_name,
                                     patch=i, npatch=self.npatch, patch_centers=self._centers)
                             for i in patch_set]
        else:
            patch_set = sorted(set(self.patch))
            if len(patch_set) != self.npatch:
                self.logger.error("WARNING: Some patch numbers do not contain any objects!")
                missing = set(range(self.npatch)) - set(patch_set)
                self.logger.warning("The following patch numbers have no objects: %s",missing)
                self.logger.warning("This may be a problem depending on your use case.")
            self._patches = []
            for i in patch_set:
                indx = np.where(self.patch == i)[0]
                x=self.x[indx] if self.x is not None else None
                y=self.y[indx] if self.y is not None else None
                z=self.z[indx] if self.z is not None else None
                ra=self.ra[indx] if self.ra is not None else None
                dec=self.dec[indx] if self.dec is not None else None
                r=self.r[indx] if self.r is not None else None
                w=self.w[indx] if self.nontrivial_w else None
                wpos=self.wpos[indx] if self.wpos is not None else None
                k=self.k[indx] if self.k is not None else None
                z1=self.z1[indx] if self.z1 is not None else None
                z2=self.z2[indx] if self.z2 is not None else None
                v1=self.v1[indx] if self.v1 is not None else None
                v2=self.v2[indx] if self.v2 is not None else None
                g1=self.g1[indx] if self.g1 is not None else None
                g2=self.g2[indx] if self.g2 is not None else None
                t1=self.t1[indx] if self.t1 is not None else None
                t2=self.t2[indx] if self.t2 is not None else None
                q1=self.q1[indx] if self.q1 is not None else None
                q2=self.q2[indx] if self.q2 is not None else None
                check_wpos = self._wpos if self._wpos is not None else self._w
                kwargs = dict(keep_zero_weight=np.any(check_wpos==0))
                if self.ra is not None:
                    kwargs['ra_units'] = 'rad'
                    kwargs['dec_units'] = 'rad'
                    kwargs['allow_xyz'] = True
                p = Catalog(x=x, y=y, z=z, ra=ra, dec=dec, r=r, w=w, wpos=wpos,
                            k=k, z1=z1, z2=z2, v1=v1, v2=v2, g1=g1, g2=g2,
                            t1=t1, t2=t2, q1=q1, q2=q2,
                            patch=i, npatch=self.npatch, **kwargs)
                self._patches.append(p)

        # Write the patches to files if requested.
        if self.save_patch_dir is not None:
            self.write_patches()
            if low_mem:
                # If low_mem, replace _patches with a version the reads from these files.
                # This will typically be a lot faster for when the load does happen.
                self.read_patches()

        return self._patches

[docs]    def write(self, file_name, *, file_type=None, precision=None):
        """Write the catalog to a file.

        The position columns are output using the same units as were used when building the
        Catalog.  If you want to use a different unit, you can set the catalog's units directly
        before writing.  e.g.:

            >>> cat = treecorr.Catalog('cat.dat', ra=ra, dec=dec,
                                       ra_units='hours', dec_units='degrees')
            >>> cat.ra_units = coord.degrees
            >>> cat.write('new_cat.dat')

        The output file will include some of the following columns (those for which the
        corresponding attribute is not None):

        ========      =======================================================
        Column        Description
        ========      =======================================================
        ra            self.ra if not None
        dec           self.dec if not None
        r             self.r if not None
        x             self.x if not None
        y             self.y if not None
        z             self.z if not None
        w             self.w if not None and self.nontrivial_w
        wpos          self.wpos if not None
        k             self.k if not None
        z1            self.z1 if not None
        z2            self.z2 if not None
        v1            self.v1 if not None
        v2            self.v2 if not None
        g1            self.g1 if not None
        g2            self.g2 if not None
        t1            self.t1 if not None
        t2            self.t2 if not None
        q1            self.q1 if not None
        q2            self.q2 if not None
        patch         self.patch if not None
        ========      =======================================================

        Parameters:
            file_name (str):    The name of the file to write to.
            file_type (str):    The type of file to write ('ASCII' or 'FITS').  (default:
                                determine the type automatically from the extension of file_name.)
            precision (int):    For ASCII output catalogs, the desired precision. (default: 16;
                                this value can also be given in the Catalog constructor in the
                                config dict as cat_precision.)
        Returns:
            The column names that were written to the file as a list.
        """
        self.logger.info('Writing catalog to %s',file_name)

        col_names = []
        columns = []
        if self.ra is not None:
            col_names.append('ra')
            columns.append(self.ra / self.ra_units)
            col_names.append('dec')
            columns.append(self.dec / self.dec_units)
            if self.r is not None:
                col_names.append('r')
                columns.append(self.r)
        else:
            col_names.append('x')
            columns.append(self.x / self.x_units)
            col_names.append('y')
            columns.append(self.y / self.y_units)
            if self.z is not None:
                col_names.append('z')
                columns.append(self.z)
        if self.nontrivial_w:
            col_names.append('w')
            columns.append(self.w)
        if self.wpos is not None:
            col_names.append('wpos')
            columns.append(self.wpos)
        if self.k is not None:
            col_names.append('k')
            columns.append(self.k)
        if self.z1 is not None:
            col_names.append('z1')
            columns.append(self.z1)
        if self.z2 is not None:
            col_names.append('z2')
            columns.append(self.z2)
        if self.v1 is not None:
            col_names.append('v1')
            columns.append(self.v1)
        if self.v2 is not None:
            col_names.append('v2')
            columns.append(self.v2)
        if self.g1 is not None:
            col_names.append('g1')
            columns.append(self.g1)
        if self.g2 is not None:
            col_names.append('g2')
            columns.append(self.g2)
        if self.t1 is not None:
            col_names.append('t1')
            columns.append(self.t1)
        if self.t2 is not None:
            col_names.append('t2')
            columns.append(self.t2)
        if self.q1 is not None:
            col_names.append('q1')
            columns.append(self.q1)
        if self.q2 is not None:
            col_names.append('q2')
            columns.append(self.q2)
        if self._patch is not None:
            col_names.append('patch')
            columns.append(self.patch)

        if precision is None:
            precision = get(self.config,'cat_precision',int,16)

        writer = make_writer(file_name, precision=precision, file_type=file_type,
                             logger=self.logger)
        with writer:
            writer.write(col_names, columns)
        return col_names

[docs]    def copy(self):
        """Make a copy"""
        return copy.deepcopy(self)

    def __getstate__(self):
        d = self.__dict__.copy()
        d.pop('logger',None)  # Oh well.  This is just lost in the copy.  Can't be pickled.
        d.pop('_field',None)
        d.pop('_nfields',None)
        d.pop('_kfields',None)
        d.pop('_zfields',None)
        d.pop('_vfields',None)
        d.pop('_gfields',None)
        d.pop('_tfields',None)
        d.pop('_qfields',None)
        return d

    def __setstate__(self, d):
        self.__dict__ = d
        if self._logger_name is not None:  # pragma: no branch
            self.logger = setup_logger(get(self.config,'verbose',int,1),
                                       self.config.get('log_file',None), self._logger_name)
        self._field = lambda : None

    def __repr__(self):
        s = 'treecorr.Catalog('
        if self.loaded:
            if self.x is not None and self.ra is None: s += 'x='+repr(self.x)+','
            if self.y is not None and self.ra is None: s += 'y='+repr(self.y)+','
            if self.z is not None and self.ra is None: s += 'z='+repr(self.z)+','
            if self.ra is not None: s += 'ra='+repr(self.ra)+",ra_units='rad',"
            if self.dec is not None: s += 'dec='+repr(self.dec)+",dec_units='rad',"
            if self.r is not None: s += 'r='+repr(self.r)+','
            if self.nontrivial_w: s += 'w='+repr(self.w)+','
            if self.wpos is not None: s += 'wpos='+repr(self.wpos)+','
            if self.k is not None: s += 'k='+repr(self.k)+','
            if self.z1 is not None: s += 'z1='+repr(self.z1)+','
            if self.z2 is not None: s += 'z2='+repr(self.z2)+','
            if self.v1 is not None: s += 'v1='+repr(self.v1)+','
            if self.v2 is not None: s += 'v2='+repr(self.v2)+','
            if self.g1 is not None: s += 'g1='+repr(self.g1)+','
            if self.g2 is not None: s += 'g2='+repr(self.g2)+','
            if self.t1 is not None: s += 't1='+repr(self.t1)+','
            if self.t2 is not None: s += 't2='+repr(self.t2)+','
            if self.q1 is not None: s += 'q1='+repr(self.q1)+','
            if self.q2 is not None: s += 'q2='+repr(self.q2)+','
            if self.patch is not None: s += 'patch='+repr(self.patch)+','
            wpos = self._wpos if self._wpos is not None else self._w
            if np.any(wpos == 0): s += 'keep_zero_weight=True,'
            # remove the last ','
            s = s[:-1] + ')'
        else:
            # Catalog isn't loaded yet. Use file_name info here instead.
            s += 'file_name='+repr(self.file_name)+','
            s += 'config ='+repr(self.config)
            s += ')'
        return s

    def __eq__(self, other):
        return (isinstance(other, Catalog) and
                np.array_equal(self.x, other.x) and
                np.array_equal(self.y, other.y) and
                np.array_equal(self.z, other.z) and
                np.array_equal(self.ra, other.ra) and
                np.array_equal(self.dec, other.dec) and
                np.array_equal(self.r, other.r) and
                np.array_equal(self.w, other.w) and
                np.array_equal(self.wpos, other.wpos) and
                np.array_equal(self.k, other.k) and
                np.array_equal(self.z1, other.z1) and
                np.array_equal(self.z2, other.z2) and
                np.array_equal(self.v1, other.v1) and
                np.array_equal(self.v2, other.v2) and
                np.array_equal(self.g1, other.g1) and
                np.array_equal(self.g2, other.g2) and
                np.array_equal(self.t1, other.t1) and
                np.array_equal(self.t2, other.t2) and
                np.array_equal(self.q1, other.q1) and
                np.array_equal(self.q2, other.q2) and
                np.array_equal(self.patch, other.patch))


[docs]def read_catalogs(config, key=None, list_key=None, *, num=0, logger=None, is_rand=None):
    """Read in a list of catalogs for the given key.

    key should be the file_name parameter or similar key word.
    list_key should be be corresponging file_list parameter, if appropriate.
    At least one of key or list_key must be provided.  If both are provided, then only
    one of these should be in the config dict.

    num indicates which key to use if any of the fields like x_col, flip_g1, etc. are lists.
    The default is 0, which means to use the first item in the list if they are lists.

    If the config dict specifies that patches be used, the returned list of Catalogs will be
    a concatenation of the patches for each of the specified names.

    Parameters:
        config (dict):  The configuration dict to use for the appropriate parameters
        key (str):      Which key name to use for the file names. e.g. 'file_name' (default: None)
        list_key (str): Which key name to use for the name of a list file. e.g. 'file_list'.
                        Either key or list_key is required.  (default: None)
        num (int):      Which number catalog does this correspond to. e.g. file_name should use
                        num=0, file_name2 should use num=1.  (default: 0)
        logger:         If desired, a Logger object for logging. (default: None, in which case
                        one will be built according to the config dict's verbose level.)
        is_rand (bool): If this is a random file, then setting is_rand to True will let them
                        skip k_col, g1_col, and g2_col if they were set for the main catalog.
                        (default: False)

    Returns:
        A list of Catalogs or None if no catalogs are specified.
    """
    if logger is None:
        logger = setup_logger(get(config,'verbose',int,1), config.get('log_file',None))

    if key is None and list_key is None:
        raise TypeError("Must provide either key or list_key")
    if key is not None and key in config:
        if list_key is not None and list_key in config:
            raise TypeError("Cannot provide both key and list_key")
        file_names = config[key]
    elif list_key is not None and list_key in config:
        list_file = config[list_key]
        with open(list_file,'r') as fin:
            file_names = [ f.strip() for f in fin ]
    else:
        # If this key was required (i.e. file_name) then let the caller check this.
        return []
    if is_rand is None:
        if key is not None:
            is_rand = 'rand' in key
        else:
            is_rand = 'rand' in list_key
    if not isinstance(file_names,list):
        file_names = file_names.split()
    ret = []
    for file_name in file_names:
        ret += Catalog(file_name, config, num=num, logger=logger, is_rand=is_rand).get_patches()
    return ret

def _compute_var_multi_cat(cat_list, k, low_mem):
    # k is name of the quantity to get from each catalog:
    # e.g. vark, _meank, _altmeank.
    vark = 0
    meank = 0
    altmeank = 0
    sumw = 0
    sumw2 = 0
    for cat in cat_list:
        getattr(cat, 'var' + k[0])  # Make sure _ quantities are computed.
        cat_vark = getattr(cat, '_var' + k)
        cat_meank = getattr(cat, '_mean' + k)
        cat_altmeank = getattr(cat, '_altmean' + k)
        vark += cat_vark * cat.sumw + cat_meank * cat.sumw2 * (2*cat_altmeank - cat_meank)
        meank += cat_meank * cat.sumw
        altmeank += cat_altmeank * cat.sumw2
        sumw += cat.sumw
        sumw2 += cat.sumw2
        if low_mem:
            cat.unload()
    meank /= sumw
    altmeank /= sumw2
    vark = (vark - meank * sumw2 * (2*altmeank - meank)) / sumw
    return vark

[docs]def calculateMeanW(cat_list, *, low_mem=False):
    """Calculate the mean weight from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average
    variance weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual vark. (default: False)

    Returns:
        The mean weight.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.meanw
    elif len(cat_list) == 1:
        return cat_list[0].meanw
    else:
        sumw = 0
        nobj = 0
        for cat in cat_list:
            sumw += cat.sumw
            nobj += cat.nobj
            if low_mem:
                cat.unload()
        return sumw / nobj

[docs]def calculateVarK(cat_list, *, low_mem=False):
    """Calculate the overall variance of the scalar field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average
    variance weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual vark. (default: False)

    Returns:
        The variance of the scalar field.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.vark
    elif len(cat_list) == 1:
        return cat_list[0].vark
    else:
        # When building up from multiple catalogs, we need to calculate the
        # overall mean and get the variance around that.  So this is a little complicated.
        # In practice, it probably doesn't matter at all for real data sets, but some of the
        # unit tests have small enough N that this matters.
        return _compute_var_multi_cat(cat_list, 'k', low_mem)

[docs]def calculateVarZ(cat_list, *, low_mem=False):
    """Calculate the overall variance of the complex scalar field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average vector
    variance (per component) weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the vector variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual varz. (default: False)

    Returns:
        The variance per component of the vector field.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.varz
    elif len(cat_list) == 1:
        return cat_list[0].varz
    else:
        varz1 = _compute_var_multi_cat(cat_list, 'z1', low_mem)
        varz2 = _compute_var_multi_cat(cat_list, 'z2', low_mem)
        return (varz1 + varz2)/2.

[docs]def calculateVarV(cat_list, *, low_mem=False):
    """Calculate the overall variance of the vector field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average vector
    variance (per component) weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the vector variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual varv. (default: False)

    Returns:
        The variance per component of the vector field.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.varv
    elif len(cat_list) == 1:
        return cat_list[0].varv
    else:
        varv1 = _compute_var_multi_cat(cat_list, 'v1', low_mem)
        varv2 = _compute_var_multi_cat(cat_list, 'v2', low_mem)
        return (varv1 + varv2)/2.

[docs]def calculateVarG(cat_list, *, low_mem=False):
    """Calculate the overall variance of the shear field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average shear
    variance (per component) weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the shear variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual varg. (default: False)

    Returns:
        The variance per component of the shear field (aka shape noise).
    """
    if isinstance(cat_list, Catalog):
        return cat_list.varg
    elif len(cat_list) == 1:
        return cat_list[0].varg
    else:
        varg1 = _compute_var_multi_cat(cat_list, 'g1', low_mem)
        varg2 = _compute_var_multi_cat(cat_list, 'g2', low_mem)
        return (varg1 + varg2)/2.

[docs]def calculateVarT(cat_list, *, low_mem=False):
    """Calculate the overall variance of the trefoil field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average
    variance (per component) weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the shear variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual varg. (default: False)

    Returns:
        The variance per component of the trefoil field.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.vart
    elif len(cat_list) == 1:
        return cat_list[0].vart
    else:
        vart1 = _compute_var_multi_cat(cat_list, 't1', low_mem)
        vart2 = _compute_var_multi_cat(cat_list, 't2', low_mem)
        return (vart1 + vart2)/2.

[docs]def calculateVarQ(cat_list, *, low_mem=False):
    """Calculate the overall variance of the quatrefoil field from a list of catalogs.

    The catalogs are assumed to be equivalent, so this is just the average
    variance (per component) weighted by the number of objects in each catalog.

    Parameters:
        cat_list:   A Catalog or a list of Catalogs for which to calculate the shear variance.
        low_mem:    Whether to try to conserve memory when cat_list is a list by unloading each
                    catalog after getting its individual varg. (default: False)

    Returns:
        The variance per component of the quatrefoil field.
    """
    if isinstance(cat_list, Catalog):
        return cat_list.varq
    elif len(cat_list) == 1:
        return cat_list[0].varq
    else:
        varq1 = _compute_var_multi_cat(cat_list, 'q1', low_mem)
        varq2 = _compute_var_multi_cat(cat_list, 'q2', low_mem)
        return (varq1 + varq2)/2.

[docs]def isKColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the k column.

    The logic here is the same as for `isGColRequired`, but we check for output files that require
    the k column rather than g1,g2.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid g1/g2 columns,
        False if not.

    """
    return config and ('kk_file_name' in config
                       or (num==0 and 'kv_file_name' in config)
                       or (num==0 and 'kg_file_name' in config)
                       or (num==0 and 'kt_file_name' in config)
                       or (num==0 and 'kq_file_name' in config)
                       or (num==1 and 'nk_file_name' in config))

[docs]def isZColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the z1,z2 columns.

    The logic here is the same as for `isGColRequired`, but we check for output files that require
    the z1,z2 columns rather than g1,g2.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid z1/z2 columns,
        False if not.

    """
    return config and ('zz_file_name' in config
                       or (num==1 and 'nz_file_name' in config)
                       or (num==1 and 'kz_file_name' in config))

[docs]def isVColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the v1,v2 columns.

    The logic here is the same as for `isGColRequired`, but we check for output files that require
    the v1,v2 columns rather than g1,g2.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid v1/v2 columns,
        False if not.

    """
    return config and ('vv_file_name' in config
                       or (num==1 and 'nv_file_name' in config)
                       or (num==1 and 'kv_file_name' in config))

[docs]def isGColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the g1,g2 columns.

    It checks the config dict for the output file names gg_file_name, ng_file_name (only if
    num == 1), etc.  If the output files indicate that we don't need the g1/g2 columns, then
    we don't need to raise an error if the g1_col or g2_col is invalid.

    This makes it easier to specify columns. e.g. for an NG correlation function, the
    first catalog does not need to have the g1,g2 columns, and typically wouldn't.  So
    if you specify g1_col=5, g2_col=6, say, and the first catalog does not have these columns,
    you would normally get an error.

    But instead, we check that the calculation is going to be NG from the presence of an
    ng_file_name parameter, and we let the would-be error pass.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid g1/g2 columns,
        False if not.

    """
    return config and ('gg_file_name' in config
                       or 'm2_file_name' in config
                       or (num==1 and 'norm_file_name' in config)
                       or (num==1 and 'ng_file_name' in config)
                       or (num==1 and 'nm_file_name' in config)
                       or (num==1 and 'kg_file_name' in config))

[docs]def isTColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the t1,t2 columns.

    The logic here is the same as for `isGColRequired`, but we check for output files that require
    the t1,t2 columns rather than g1,g2.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid t1/t2 columns,
        False if not.
    """
    return config and ('tt_file_name' in config
                       or (num==1 and 'nt_file_name' in config)
                       or (num==1 and 'kt_file_name' in config))

[docs]def isQColRequired(config, num):
    """A quick helper function that checks whether we need to bother reading the q1,q2 columns.

    The logic here is the same as for `isGColRequired`, but we check for output files that require
    the q1,q2 columns rather than g1,g2.

    Parameters:
        config (dict):  The configuration file to check.
        num (int):      Which number catalog are we working on.

    Returns:
        True if some output file requires this catalog to have valid q1/q2 columns,
        False if not.
    """
    return config and ('qq_file_name' in config
                       or (num==1 and 'nq_file_name' in config)
                       or (num==1 and 'kq_file_name' in config))