Source code for rainfallqc.core.all_qc_checks

# -*- coding: utf-8 -*-
"""Register for QC checks in RainfallQC."""

import functools
import inspect
import itertools

import polars as pl

from rainfallqc.utils import data_utils

QC_CHECKS = {}


[docs] def qc_check(name: str, require_non_negative: bool = False) -> callable: """ Register a QC check and check for non-negative values if required. Parameters ---------- name : Name of the QC check. require_non_negative : If True, check that the target gauge column has no negative values before running the QC check Returns ------- callable : Decorator to register the QC check. Raises ------ ValueError : If require_non_negative is True and the target gauge column contains negative values. """ def decorator(func: callable) -> callable: @functools.wraps(func) def wrapper(df: pl.DataFrame, *args, **kwargs) -> list: # Bind args/kwargs to signature to include defaults bound = inspect.signature(func).bind_partial(df, *args, **kwargs) bound.apply_defaults() full_kwargs = bound.arguments # dict including defaults columns_to_check = [] columns_to_check = get_columns_in_kwargs( full_kwargs, kwarg_name="target_gauge_col", column_list=columns_to_check, name=name ) for kwarg_name in ["nearest_neighbour", "list_of_nearest_stations"]: if kwarg_name in full_kwargs: columns_to_check = get_columns_in_kwargs( full_kwargs, kwarg_name=kwarg_name, column_list=columns_to_check, name=name ) # flatten column list for list_of_nearest_stations columns_to_check = list( itertools.chain.from_iterable(col if isinstance(col, list) else [col] for col in columns_to_check) ) # Optional non-negative pre-check for col in columns_to_check: if require_non_negative and data_utils.check_for_negative_values(df, col): raise ValueError(f"{name} failed: column '{col}' contains negative values.") # Run the actual QC check return func(df, *args, **kwargs) # Register for later use QC_CHECKS[name] = wrapper return wrapper return decorator
[docs] def get_columns_in_kwargs(kwargs: dict, kwarg_name: str, column_list: list, name: str) -> list: """ Check that a column exists in the DataFrame. Parameters ---------- kwargs : Dictionary of keyword arguments. kwarg_name : Name of the kwarg to check. column_list : List to append the column_name to if it exists. name : Name of the QC check (for error messages). Raises ------ ValueError : If the column does not exist in the DataFrame. """ col_name = kwargs.get(kwarg_name) # user defined column name for that kwarg if col_name is None: raise ValueError(f"The QC check '{name}' requires the '{kwarg_name}' to be set.") column_list.append(col_name) return column_list