Source code for hipscat.pixel_math.partition_stats

"""Utilities for generating and manipulating object count histograms"""

import healpy as hp
import numpy as np
import pandas as pd

from hipscat.pixel_math.healpix_pixel import HealpixPixel


[docs] def empty_histogram(highest_order): """Use numpy to create an histogram array with the right shape, filled with zeros. Args: highest_order (int): the highest healpix order (e.g. 0-10) Returns: one-dimensional numpy array of long integers, where the length is equal to the number of pixels in a healpix map of target order, and all values are set to 0. """ return np.zeros(hp.order2npix(highest_order), dtype=np.int64)
[docs] def generate_histogram( data: pd.DataFrame, highest_order, ra_column="ra", dec_column="dec", ): """Generate a histogram of counts for objects found in `data` Args: data (:obj:`pd.DataFrame`): tabular object data highest_order (int): the highest healpix order (e.g. 0-10) ra_column (str): where in the input to find the celestial coordinate, right ascension dec_column (str): where in the input to find the celestial coordinate, declination Returns: one-dimensional numpy array of long integers where the value at each index corresponds to the number of objects found at the healpix pixel. Raises: ValueError: if the `ra_column` or `dec_column` cannot be found in the input file. """ histogram_result = empty_histogram(highest_order) # Verify that the data frame has columns with desired names. required_columns = [ra_column, dec_column] if not all(x in data.columns for x in required_columns): raise ValueError(f"Invalid column names in input: {ra_column}, {dec_column}") mapped_pixels = hp.ang2pix( 2**highest_order, data[ra_column].values, data[dec_column].values, lonlat=True, nest=True, ) mapped_pixel, count_at_pixel = np.unique(mapped_pixels, return_counts=True) histogram_result[mapped_pixel] += count_at_pixel.astype(np.int64) return histogram_result
[docs] def generate_alignment(histogram, highest_order=10, lowest_order=0, threshold=1_000_000): """Generate alignment from high order pixels to those of equal or lower order We may initially find healpix pixels at order 10, but after aggregating up to the pixel threshold, some final pixels are order 4 or 7. This method provides a map from pixels at order 10 to their destination pixel. This may be used as an input into later partitioning map reduce steps. Args: histogram (:obj:`np.array`): one-dimensional numpy array of long integers where the value at each index corresponds to the number of objects found at the healpix pixel. highest_order (int): the highest healpix order (e.g. 5-10) lowest_order (int): the lowest healpix order (e.g. 1-5). specifying a lowest order constrains the partitioning to prevent spatially large pixels. threshold (int): the maximum number of objects allowed in a single pixel Returns: one-dimensional numpy array of integer 3-tuples, where the value at each index corresponds to the destination pixel at order less than or equal to the `highest_order`. The tuple contains three integers: - order of the destination pixel - pixel number *at the above order* - the number of objects in the pixel Raises: ValueError: if the histogram is the wrong size, or some initial histogram bins exceed threshold. """ if len(histogram) != hp.order2npix(highest_order): raise ValueError("histogram is not the right size") if lowest_order > highest_order: raise ValueError("lowest_order should be less than highest_order") max_bin = np.amax(histogram) if max_bin > threshold: raise ValueError(f"single pixel count {max_bin} exceeds threshold {threshold}") nested_sums = [] for i in range(0, highest_order): nested_sums.append(empty_histogram(i)) nested_sums.append(histogram) # work backward - from highest order, fill in the sums of lower order pixels for read_order in range(highest_order, lowest_order, -1): parent_order = read_order - 1 for index in range(0, len(nested_sums[read_order])): parent_pixel = index >> 2 nested_sums[parent_order][parent_pixel] += nested_sums[read_order][index] nested_alignment = [] for i in range(0, highest_order + 1): nested_alignment.append(np.full(hp.order2npix(i), None)) # work forward - determine if we should map to a lower order pixel, this pixel, or keep looking. for read_order in range(lowest_order, highest_order + 1): parent_order = read_order - 1 for index in range(0, len(nested_sums[read_order])): parent_alignment = None if parent_order >= 0: parent_pixel = index >> 2 parent_alignment = nested_alignment[parent_order][parent_pixel] if parent_alignment: nested_alignment[read_order][index] = parent_alignment elif nested_sums[read_order][index] == 0: continue elif nested_sums[read_order][index] <= threshold: nested_alignment[read_order][index] = ( read_order, index, nested_sums[read_order][index], ) return nested_alignment[highest_order]
[docs] def generate_destination_pixel_map(histogram, pixel_map): """Generate mapping from destination pixel to all the constituent pixels. Args: histogram (:obj:`np.array`): one-dimensional numpy array of long integers where the value at each index corresponds to the number of objects found at the healpix pixel. pixel_map (:obj:`np.array`): one-dimensional numpy array of integer 3-tuples. See :func:`generate_alignment` for more details on this format. Returns: dictionary that maps the integer 3-tuple of a pixel at destination order to the set of indexes in histogram for the pixels at the original healpix order """ # Find all distinct destination pixels non_none_elements = pixel_map[pixel_map != np.array(None)] unique_pixels = np.unique(non_none_elements) # Compute the order from the number of pixels at the level. max_order = hp.npix2order(len(histogram)) result = {} for order, pixel, count in unique_pixels: # Find all constituent pixels at the histogram's order explosion_factor = 4 ** (max_order - int(order)) start_pixel = int(pixel) * explosion_factor end_pixel = (int(pixel) + 1) * explosion_factor non_zero_indexes = np.nonzero(histogram[start_pixel:end_pixel])[0] + start_pixel result[(order, pixel, count)] = non_zero_indexes.tolist() return result
[docs] def compute_pixel_map(histogram, highest_order=10, lowest_order=0, threshold=1_000_000): # pylint: disable=too-many-locals """Generate alignment from high order pixels to those of equal or lower order We may initially find healpix pixels at order 10, but after aggregating up to the pixel threshold, some final pixels are order 4 or 7. This method provides a map from destination healpix pixels at a lower order to the origin pixels at the highest order. This may be used as an input into later partitioning map reduce steps. Args: histogram (:obj:`np.array`): one-dimensional numpy array of long integers where the value at each index corresponds to the number of objects found at the healpix pixel. highest_order (int): the highest healpix order (e.g. 0-10) lowest_order (int): the lowest healpix order (e.g. 1-5). specifying a lowest order constrains the partitioning to prevent spatially large pixels. threshold (int): the maximum number of objects allowed in a single pixel Returns: dictionary that maps the HealpixPixel (a pixel at destination order) to a tuple of origin pixel information. The tuple contains the following: - 0 - the total number of rows found in this destination pixel - 1 - the set of indexes in histogram for the pixels at the original healpix order. Raises: ValueError: if the histogram is the wrong size, or some initial histogram bins exceed threshold. """ if len(histogram) != hp.order2npix(highest_order): raise ValueError("histogram is not the right size") if lowest_order > highest_order: raise ValueError("lowest_order should be less than highest_order") max_bin = np.amax(histogram) if max_bin > threshold: raise ValueError(f"single pixel count {max_bin} exceeds threshold {threshold}") nested_sums = [] for order in range(0, highest_order): explosion_factor = 4 ** (highest_order - order) nested_sums.append(histogram.reshape(-1, explosion_factor).sum(axis=1)) nested_sums.append(histogram) ## Determine the lowest order that does not exceed the threshold orders_at_threshold = [lowest_order if 0 < k <= threshold else None for k in nested_sums[lowest_order]] for order in range(lowest_order + 1, highest_order + 1): new_orders_at_threshold = np.array( [order if 0 < k <= threshold else None for k in nested_sums[order]] ) parent_alignment = np.repeat(orders_at_threshold, 4, axis=0) orders_at_threshold = [ parent_order if parent_order is not None else new_order for parent_order, new_order in zip(parent_alignment, new_orders_at_threshold) ] ## Zip up the orders and the pixel numbers. healpix_pixels = np.array( [ HealpixPixel(order, pixel >> 2 * (highest_order - order)) if order is not None else None for order, pixel in zip(orders_at_threshold, np.arange(0, len(orders_at_threshold))) ] ) ## Create a map from healpix pixel to origin pixel data non_none_elements = healpix_pixels[healpix_pixels != np.array(None)] unique_pixels = np.unique(non_none_elements) result = {} for healpix_pixel in unique_pixels: # Find all nonzero constituent pixels at the histogram's order explosion_factor = 4 ** (highest_order - healpix_pixel.order) start_pixel = healpix_pixel.pixel * explosion_factor end_pixel = (healpix_pixel.pixel + 1) * explosion_factor non_zero_indexes = np.nonzero(histogram[start_pixel:end_pixel])[0] + start_pixel result[healpix_pixel] = ( nested_sums[healpix_pixel.order][healpix_pixel.pixel], non_zero_indexes.tolist(), ) return result
[docs] def generate_constant_pixel_map(histogram, constant_healpix_order): """Special case of creating a destination pixel map where you want to preserve some constant healpix order across the sky. NB: - this method filters out pixels with no counts - no upper thresholds are applied, and single pixel may contain many rows Args: histogram (:obj:`np.array`): one-dimensional numpy array of long integers where the value at each index corresponds to the number of objects found at the healpix pixel. constant_healpix_order (int): the desired healpix order (e.g. 0-10) Returns: dictionary that maps non-empty bins as HealpixPixel to a tuple of origin pixel information. The tuple contains the following: - 0 - the total number of rows found in this destination pixel, same as the origin bin - 1 - the set of indexes in histogram for the pixels at the original healpix order, which will be a list containing only the origin pixel. Raises: ValueError: if the histogram is the wrong size """ if len(histogram) != hp.order2npix(constant_healpix_order): raise ValueError("histogram is not the right size") non_zero_indexes = np.nonzero(histogram)[0] healpix_pixels = [HealpixPixel(constant_healpix_order, pixel) for pixel in non_zero_indexes] value_list = [(histogram[pixel], [pixel]) for pixel in non_zero_indexes] return dict(zip(healpix_pixels, value_list))