Source code for hipscat.io.validation

from typing import Any, Dict, Union

from hipscat.catalog.dataset.catalog_info_factory import from_catalog_dir
from hipscat.io import get_parquet_metadata_pointer, get_partition_info_pointer
from hipscat.io.file_io.file_pointer import FilePointer, is_regular_file


[docs] def is_valid_catalog(pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None) -> bool: """Checks if a catalog is valid for a given base catalog pointer Args: pointer (FilePointer): pointer to base catalog directory storage_options: dictionary that contains abstract filesystem credentials Returns: True if both the catalog_info and partition_info files are valid, False otherwise """ return is_catalog_info_valid(pointer, storage_options=storage_options) and ( is_partition_info_valid(pointer, storage_options=storage_options) or is_metadata_valid(pointer, storage_options=storage_options) )
[docs] def is_catalog_info_valid(pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None) -> bool: """Checks if catalog_info is valid for a given base catalog pointer Args: pointer (FilePointer): pointer to base catalog directory storage_options: dictionary that contains abstract filesystem credentials Returns: True if the catalog_info file exists, and it is correctly formatted, False otherwise """ is_valid = True try: from_catalog_dir(pointer, storage_options=storage_options) except (FileNotFoundError, ValueError, NotImplementedError): is_valid = False return is_valid
[docs] def is_partition_info_valid( pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None ) -> bool: """Checks if partition_info is valid for a given base catalog pointer Args: pointer (FilePointer): pointer to base catalog directory storage_options: dictionary that contains abstract filesystem credentials Returns: True if the partition_info file exists, False otherwise """ partition_info_pointer = get_partition_info_pointer(pointer) partition_info_exists = is_regular_file(partition_info_pointer, storage_options=storage_options) return partition_info_exists
[docs] def is_metadata_valid(pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None) -> bool: """Checks if _metadata is valid for a given base catalog pointer Args: pointer (FilePointer): pointer to base catalog directory storage_options: dictionary that contains abstract filesystem credentials Returns: True if the _metadata file exists, False otherwise """ metadata_file = get_parquet_metadata_pointer(pointer) metadata_file_exists = is_regular_file(metadata_file, storage_options=storage_options) return metadata_file_exists