Source code for qp.parameterizations.quant.quant

"""This module implements a distribution parameterization sub-class using interpolated quantiles"""

from __future__ import annotations
import logging
import sys

import numpy as np
from scipy.stats import rv_continuous
from typing import Mapping, Optional
from numpy.typing import ArrayLike
import warnings

from .quant_utils import extract_quantiles, pad_quantiles
from ...core.factory import add_class
from ...core.ensemble import Ensemble
from ..base import Pdf_rows_gen
from ...plotting import get_axes_and_xlims, plot_pdf_quantiles_on_axes
from . import (
    AbstractQuantilePdfConstructor,
    CdfSplineDerivative,
    DualSplineAverage,
    PiecewiseConstant,
    PiecewiseLinear,
)
from ...utils.array import reshape_to_pdf_size
from ...utils.interpolation import interpolate_multi_x_y, interpolate_x_multi_y

epsilon = sys.float_info.epsilon


DEFAULT_PDF_CONSTRUCTOR = "piecewise_linear"
PDF_CONSTRUCTORS = {
    "cdf_spline_derivative": CdfSplineDerivative,
    "dual_spline_average": DualSplineAverage,
    "piecewise_linear": PiecewiseLinear,
    "piecewise_constant": PiecewiseConstant,
}


[docs] class quant_gen(Pdf_rows_gen): # pylint: disable=too-many-instance-attributes """Quantile based distribution, where the PDF is defined from the quantiles. Parameters ---------- quants : ArrayLike The quantiles of the CDF, of shape n locs : ArrayLike The locations at which those quantiles are reached, of shape (npdf, n) pdf_constructor_name : str, optional The constructor or interpolator to use to create the PDF, by default "piecewise_linear". ensure_extent : bool, optional If True, will ensure that the quants start at 0 and end at 1 by adding data points at both ends until this is true. locs are extrapolated linearly from input data. By default True. warn : bool, optional If True, raises warnings if input is not valid data (i.e. if data is not finite). If False, no warnings are raised. By default True. Notes ----- Converting to this parameterization: This table contains the available methods to convert to this parameterization, their required arguments, and their method keys. If the key is `None`, this is the default conversion method. +---------------------+-----------+------------+ | Function | Arguments | Method key | +---------------------+-----------+------------+ |`.extract_quantiles` | quants | None | +---------------------+-----------+------------+ Implementation notes: This implements a CDF by interpolating a set of quantile values It takes a set of quants and locs values and uses `scipy.interpolate.interp1d` with a spline interpolation method of order 2 (kind=`quadratic`) to build the CDF. It has multiple PDF constructors to get the PDF from the quantiles. The default is the `piecewise_linear` method, which takes the numerical derivative of the CDF and interpolates between those points. `ppf(0)` returns negative infinity and `ppf(1)` returns positive infinity. """ # pylint: disable=protected-access name = "quant" version = 0 _support_mask = rv_continuous._support_mask def __init__( self, quants: ArrayLike, locs: ArrayLike, pdf_constructor_name: str = DEFAULT_PDF_CONSTRUCTOR, ensure_extent: bool = True, warn: bool = True, *args, **kwargs, ): """ Create a new distribution using the given values Parameters ---------- quants : ArrayLike The quantiles of the CDF, of shape n locs : ArrayLike The locations at which those quantiles are reached, of shape (npdf, n) pdf_constructor_name : str, optional The constructor to use to create the PDF, by default "piecewise_linear". ensure_extent : bool, optional If True, will ensure that the quants start at 0 and end at 1 by adding data points at both ends until this is true. locs are extrapolated linearly from input data. By default True. warn : bool, optional If True, raises warnings if input is not valid data (i.e. if data is not finite). If False, no warnings are raised. By default True. """ self._xmin = np.min(locs) self._xmax = np.max(locs) locs_2d = reshape_to_pdf_size(np.asarray(locs), -1) # make sure input makes sense for a CDF self._validate_input(np.asarray(quants), locs_2d) # check locs are finite self._warn = warn if self._warn: if not np.all(np.isfinite(locs_2d)): indices = np.where(np.isfinite(locs_2d) != True) warnings.warn( f"There are non-finite values in the locs for the distributions: {indices[0]}", RuntimeWarning, ) self._ensure_extent = ensure_extent if self._ensure_extent: quants, locs_2d = pad_quantiles(quants, locs_2d) self._quants = np.asarray(quants) self._nquants = self._quants.size if locs_2d.shape[-1] != self._nquants: # pragma: no cover raise ValueError( "Number of locations (%i) != number of quantile values (%i)" % (self._nquants, locs_2d.shape[-1]) ) self._locs = locs_2d # set up PDF constructor if not isinstance(pdf_constructor_name, str): try: pdf_constructor_name = str(np.strings.decode(pdf_constructor_name)) except AttributeError as a_err: pdf_constructor_name = str(pdf_constructor_name) if pdf_constructor_name not in PDF_CONSTRUCTORS: raise ValueError( f"Unknown interpolator provided: '{pdf_constructor_name}'. Allowed interpolators are {list(PDF_CONSTRUCTORS.keys())}" # pylint: disable=line-too-long ) self._pdf_constructor_name = pdf_constructor_name self._pdf_constructor = None self._instantiate_pdf_constructor() kwargs["shape"] = self._locs.shape # locs.shape super().__init__(*args, **kwargs) self._addmetadata("quants", self._quants) self._addmetadata("pdf_constructor_name", self._pdf_constructor_name.encode()) self._addmetadata("ensure_extent", self._ensure_extent) self._addobjdata("locs", self._locs) def _validate_input(self, quants, locs): """Ensures that given input matches criteria for a valid CDF.""" if np.any(quants < 0) or np.any(quants > 1): raise ValueError( f"Invalid quants: One or more of the given quants is outside the allowed range (0,1): {quants}" ) if not np.all(np.diff(quants) >= 0): raise ValueError( f"Invalid quants: \n There are decreasing values, quants must be given in order from 0 to 1: {quants}" ) if not np.all(np.diff(locs) >= 0): indices = np.where(np.diff(locs) < 0) raise ValueError( f"Invalid locs: \n The given data does not produce a one-to-one CDF for the distributions at the following indices: {indices}" ) @property def quants(self) -> np.ndarray[float]: """Return quantiles used to build the CDF""" return self._quants @property def locs(self) -> np.ndarray[float]: """Return the locations at which those quantiles are reached""" return self._locs @property def pdf_constructor_name(self) -> str: """Returns the name of the current pdf constructor. Matches a key in the `PDF_CONSTRUCTORS` dictionary.""" return self._pdf_constructor_name @pdf_constructor_name.setter def pdf_constructor_name(self, value: str) -> None: """Allows users to specify a different interpolator without having to recreate the ensemble. Parameters ---------- value : str One of the supported interpolators. See `PDF_CONSTRUCTORS` dictionary for supported interpolators. Raises ------ ValueError If the value provided isn't a key in `PDF_CONSTRUCTORS`, raise a value error. """ if value not in PDF_CONSTRUCTORS: raise ValueError( f"Unknown interpolator provided: '{value}'. Allowed interpolators are {list(PDF_CONSTRUCTORS.keys())}" # pylint: disable=line-too-long ) if value is self._pdf_constructor_name: logging.warning("Already using interpolator: '%s'.", value) return self._pdf_constructor_name = value self._instantiate_pdf_constructor() self._addmetadata("pdf_constructor_name", self._pdf_constructor_name) @property def pdf_constructor(self) -> AbstractQuantilePdfConstructor: """Returns the current PDF constructor, and allows the user to interact with its methods. Returns ------- AbstractQuantilePdfConstructor Abstract base class of the active concrete PDF constructor. """ return self._pdf_constructor def _instantiate_pdf_constructor(self): self._pdf_constructor = PDF_CONSTRUCTORS[self._pdf_constructor_name]( self._quants, self._locs )
[docs] def x_samples(self) -> np.ndarray[float]: """Return a set of x values that can be used to plot all the CDFs.""" # get the range and median distance between points min_dx = np.median(np.diff(self._locs)) min_val = np.min(self._locs) max_val = np.max(self._locs) # get the number of points (make sure it's less than some huge number) npts = (max_val - min_val) // min_dx npts = np.min([int(npts), 10000]) return np.linspace(min_val, max_val, npts)
def _pdf(self, x, *args): # We're not requiring that the output be normalized! # `util.normalize_interp1d` addresses _one_ of the ways that a reconstruction # can be bad, but not all. It should be replaced with a more comprehensive # normalization function. # See qp issue #147 row = args[0] return self._pdf_constructor.construct_pdf(x, row) def _cdf(self, x, row): # pylint: disable=arguments-differ return interpolate_multi_x_y( x, row, self._locs, self._quants, bounds_error=False, fill_value=(0.0, 1), kind="quadratic", ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ return interpolate_x_multi_y( x, row, self._quants, self._locs, bounds_error=False, fill_value=(self._xmin, self._xmax), kind="quadratic", ).ravel() def _updated_ctor_param(self): """ Set the quants and locs as additional constructor arguments """ dct = super()._updated_ctor_param() dct["quants"] = self._quants dct["locs"] = self._locs dct["pdf_constructor_name"] = self._pdf_constructor_name dct["ensure_extent"] = self._ensure_extent dct["warn"] = self._warn return dct
[docs] @classmethod def get_allocation_kwds( cls, npdf, **kwargs ) -> dict[str, tuple[tuple[int, int], str]]: """Return the kwds necessary to create an `empty` HDF5 file with ``npdf`` entries for iterative write. We only need to allocate the data columns, as the metadata will be written when we finalize the file. The number of data columns is calculated based on the length or shape of the metadata, ``n``. For example, the number of columns is ``nbins-1`` for a histogram. Parameters ---------- npdf : int Total number of distributions that will be written out kwargs : The keys needed to construct the shape of the data to be written. Returns ------- dict[str, tuple[tuple[int, int], str]] A dictionary with a key for the objdata, a tuple with the shape of that data, and the data type of the data as a string. i.e. ``{objdata_key = ((npdf, n), "f4")}`` Raises ------ ValueError Raises an error if the required kwarg quants is not provided. """ try: quants = kwargs["quants"] except ValueError: # pragma: no cover print("required argument 'quants' not included in kwargs") nquants = np.shape(quants)[-1] return dict(locs=((npdf, nquants), "f4"))
[docs] @classmethod def plot_native(cls, pdf, **kwargs): """Plot the PDF in a way that is particular to this type of distribution For a quantile this shows the quantiles points. Parameters ---------- axes : Axes The axes to plot on. Either this or xlim must be provided. xlim : tuple[float, float] The x-axis limits. Either this or axes must be provided. Other Parameters ---------------- npts : int, optional The number of x values to create within the limits, by default 101 kwargs : Any keyword arguments to pass to matplotlib's axes.hist() method. Returns ------- axes : Axes The plot axes. """ axes, xlim, kw = get_axes_and_xlims(**kwargs) xvals = np.linspace(xlim[0], xlim[1], kw.pop("npts", 101)) locs = np.squeeze(pdf.dist.locs[pdf.kwds["row"]]) quants = np.squeeze(pdf.dist.quants) yvals = np.squeeze(pdf.pdf(xvals)) return plot_pdf_quantiles_on_axes( axes, xvals, yvals, quantiles=(quants, locs), **kw )
[docs] @classmethod def add_mappings(cls) -> None: """ Add this classes mappings to the conversion dictionary """ cls._add_creation_method(cls.create, None) cls._add_extraction_method(extract_quantiles, None)
[docs] @classmethod def create_ensemble( self, quants: ArrayLike, locs: ArrayLike, pdf_constructor_name: str = DEFAULT_PDF_CONSTRUCTOR, ensure_extent: bool = True, warn: bool = True, ancil: Optional[Mapping] = None, ) -> Ensemble: """Creates an Ensemble of distributions parameterized as quantiles. The options for pdf_constructor_name are: `piecewise_linear`, `piecewise_constant`, `dual_spline_average` and 'cdf_spline_derivative`. Parameters ---------- quants : ArrayLike The quantiles used to build the CDF, shape n locs : ArrayLike The locations at which those quantiles are reached, shape (npdfs, n), where npdfs is the number of distributions. pdf_constructor_name : str, optional The constructor to use to create the PDF, by default "piecewise_linear". ensure_extent : bool, optional If True, will ensure that the quants start at 0 and end at 1 by adding data points at both ends until this is true. locs are extrapolated linearly from input data. By default True. warn : bool, optional If True, raises warnings if input is not valid (i.e. if locs are not finite values). If False, no warnings are raised. By default True. ancil : Optional[Mapping], optional A dictionary of metadata for the distributions, where any arrays have the same length as the number of distributions, by default None Returns ------- Ensemble An Ensemble object containing all of the given distributions. Examples -------- To create an Ensemble with two distributions and associated ids, using the `dual_spline_average` constructor: >>> import qp >>> import numpy as np >>> quants = np.array([0.0001,0.25,0.5,0.75,0.9999]) >>> locs = np.array([[0.0001,0.1,0.3,0.5,0.75],[0.01,0.05,0.15,0.3,0.5]]) >>> pdf_constructor_name = 'dual_spline_average' >>> ancil = {'ids':[11,18]} >>> ens = qp.quant.create_ensemble(quants,locs,pdf_constructor_name,ancil=ancil) >>> ens.metadata {'pdf_name': array([b'quant'], dtype='|S5'), 'pdf_version': array([0]), 'quants': array([[0.000e+00, 1.000e-04, 2.500e-01, 5.000e-01, 7.500e-01, 9.999e-01, 1.000e+00]]), 'pdf_constructor_name': array(['dual_spline_average'], dtype='|S19'), 'check_input': array([ True])} """ data = { "quants": quants, "locs": locs, "pdf_constructor_name": pdf_constructor_name, "ensure_extent": ensure_extent, "warn": warn, } return Ensemble(self, data, ancil)
quant = quant_gen add_class(quant_gen)