Source code for hpmcm.shear_match

from __future__ import annotations

from typing import Any

import numpy as np
import pandas

from . import input_tables, output_tables
from .cell import CellData, ShearCellData
from .match import Match


# These parameters will have to change if the cells change
TRACT_SIZE = np.array([30000, 30000])
PIXEL_SIZE = 0.2 / 3600.0
CELL_INNER_SIZE = 150
CELL_BUFFER = 25


[docs] class ShearMatch(Match): """Class to do N-way matching for shear calibration. Uses pre-assigned pixel locations from cell-based coadd WCS. Since the pixel locations and cells are pre-assigned, the only configurable parameters this class takes are the attributres listed here. The pixel_match_scale can we used to allow for matching sources that are seperate by more that 1 pixel. Expects 5 input catalogs: a reference catalog and 4 counterfactual shear catalogs. Attributes ---------- pixel_match_scale: int Number of pixels to merge in original counts map cat_type: str Shear catalog type deshear: float | None Deshearing parameter, -1*applied shear. None -> deshearing is not done. Notes ----- This expectes a list of parquet files with pandas DataFrames that contain the following columns. +--------------+---------------------------------------------------------------+ | Column name | Description | +==============+===============================================================+ | id | source ID | +--------------+---------------------------------------------------------------+ | tract | Tract being matched | +--------------+---------------------------------------------------------------+ | x_cell_coadd | X-postion in cell-based coadd used for metadetect | +--------------+---------------------------------------------------------------+ | y_cell_coadd | Y-postion in cell-based coadd used for metadetect | +--------------+---------------------------------------------------------------+ | snr | Signal-to-Noise of source, used for filtering and centroiding | +--------------+---------------------------------------------------------------+ | cell_idx_x | Cell x-index within Tract | +--------------+---------------------------------------------------------------+ | cell_idx_y | Cell y-index within Tract | +--------------+---------------------------------------------------------------+ | g_1 | Shear g1 component | +--------------+---------------------------------------------------------------+ | g_2 | Shear g1 component | +--------------+---------------------------------------------------------------+ (see :py:class:`hpmcm.input_tables.ShearCoaddSourceTable`) These parquet files can be generated from files with the following columns using the ShearMatch.splitByTypeAndClean() function. +---------------------------------+---------------------------------------+ | Column name | Description | +=================================+=======================================+ | id | source ID | +---------------------------------+---------------------------------------+ | shear_type | one of "ns", "1p", "1m", "2p" "2m" | +---------------------------------+---------------------------------------+ | patch_{x,y} | id of the patch within the tract | +---------------------------------+---------------------------------------+ | cell_{x,y} | id of the cell withing the patch | +---------------------------------+---------------------------------------+ | snr | Signal-to-Noise of source | +---------------------------------+---------------------------------------+ | {cat_type}_band_flux_{band} | Flux measuremnt in the reference band | +---------------------------------+---------------------------------------+ | {cat_type}_band_flux_err_{band} | Flux error in the reference band | +---------------------------------+---------------------------------------+ | {cat_type}_g_{i} | Shear measurements | +---------------------------------+---------------------------------------+ Two additional tables are produced beyond the tables produced by the base :py:class:`hpmcm.Match` class +----------------+---------------------------------------------------+ | Key | Class | +================+===================================================+ | _object_shear | :py:class:`hpmcm.output_tables.ShearTable` | +----------------+---------------------------------------------------+ | _cluster_shear | :py:class:`hpmcm.output_tables.ShearTable` | +----------------+---------------------------------------------------+ """ inputTableClass: type = input_tables.ShearCoaddSourceTable extraCols: list[str] = ["ra", "dec", "x_pix", "y_pix", "g_1", "g_2"] def __init__( self, **kwargs: Any, ): self.pixel_match_scale: int = kwargs.get("pixel_match_scale", 1) self.cat_type: str = kwargs.get("catalogType", "wmom") self.deshear: float | None = kwargs.get("deshear", None) Match.__init__(self, **kwargs)
[docs] @classmethod def createShearMatch( cls, **kwargs: Any, ) -> ShearMatch: """Helper function to create a `ShearMatch` object This will use the use pixel-coordinates read from the input shear tables. Parameters ---------- kwargs: Passed directly to `ShearMatch` constructor. Returns ------- Object to create matches for the requested region """ n_pix = TRACT_SIZE kw = dict( pixel_size=PIXEL_SIZE, n_pixels=n_pix, cell_size=CELL_INNER_SIZE, cell_buffer=CELL_BUFFER, cell_max_object=1000, ) return cls(**kw, **kwargs)
[docs] def getCellIndices( self, df: pandas.DataFrame, ) -> np.ndarray: """Get the cell index assocatiated to each source""" return (self.n_cell[1] * df["cell_idx_x"] + df["cell_idx_y"]).astype(int)
def _buildCellData( self, id_offset: int, corner: np.ndarray, size: np.ndarray, idx: int, ) -> CellData: return ShearCellData(self, id_offset, corner, size, idx, self.cell_buffer)
[docs] def extractShearStats(self) -> list[pandas.DataFrame]: """Extract shear stats Theis will produce two :py:class:`hpmcm.output_tables.ShearTable`, one for the objects, and the other for the clusters. """ cluster_shear_stats_tables = [] object_shear_stats_tables = [] for ix in range(int(self.n_cell[0])): for iy in range(int(self.n_cell[1])): i_cell = self.getCellIdx(ix, iy) if i_cell not in self.cell_dict: continue cell_data = self.cell_dict[i_cell] assert isinstance(cell_data, ShearCellData) cluster_shear_stats_tables.append( output_tables.ShearTable.buildClusterShearStats(cell_data).data ) object_shear_stats_tables.append( output_tables.ShearTable.buildObjectShearStats(cell_data).data ) return [ pandas.concat(cluster_shear_stats_tables), pandas.concat(object_shear_stats_tables), ]
def _getPixValues(self, df: pandas.DataFrame) -> tuple[np.ndarray, np.ndarray]: x_pix, y_pix = ( df["x_pix"].values, df["y_pix"].values, ) return x_pix, y_pix
[docs] def reduceDataFrame( self, df: pandas.DataFrame, ) -> pandas.DataFrame: """Reduce a single input DataFrame Notes ----- This applies a trivial cut on signal-to-noise (snr>1). This will add these columns to the output dataframes +--------------+-------------------------------------+ | Column | Description | +==============+=====================================+ | id | Index of object inside catalog | +--------------+-------------------------------------+ | ra | Source RA | +--------------+-------------------------------------+ | dec | Source DEC | +--------------+-------------------------------------+ | cell_idx_x | X-index of Cell | +--------------+-------------------------------------+ | cell_idx_y | Y-index of Cell | +--------------+-------------------------------------+ | x_cell_coadd | X-coordinate in cell frame | +--------------+-------------------------------------+ | y_cell_coadd | Y-coordinate in cell frame | +--------------+-------------------------------------+ | x_pix | X-coordinate in global WCS frame | +--------------+-------------------------------------+ | y_pix | Y-coordinate in global WCS frame | +--------------+-------------------------------------+ | g_1 | Shear g_1 component estimate | +--------------+-------------------------------------+ | g_2 | Shear g_2 component estimate | +--------------+-------------------------------------+ | snr | Signal-to-noise ratio | +--------------+-------------------------------------+ """ df_clean = df[(df.snr > 1)] df_red = df_clean.copy(deep=True) return df_red[ [ "id", "ra", "dec", "x_pix", "y_pix", "x_cell_coadd", "y_cell_coadd", "snr", "g_1", "g_2", "cell_idx_x", "cell_idx_y", ] ]