Source code for qp.hist_pdf

"""This module implements a PDT distribution sub-class using histograms
"""


import numpy as np

from scipy.stats import rv_continuous

from qp.pdf_gen import Pdf_rows_gen
from qp.conversion_funcs import extract_hist_values, extract_hist_samples
from qp.plotting import get_axes_and_xlims, plot_pdf_histogram_on_axes
from qp.utils import (
    evaluate_hist_x_multi_y,
    interpolate_multi_x_y,
    interpolate_x_multi_y,
    reshape_to_pdf_size,
)
from qp.test_data import XBINS, HIST_DATA, TEST_XVALS, NSAMPLES
from qp.factory import add_class


[docs]class hist_gen(Pdf_rows_gen): """Histogram based distribution Notes ----- This implements a PDF using a set of histogramed values. The relevant data members are: bins: n+1 bin edges (shared for all PDFs) pdfs: (npdf, n) bin values Inside a given bin the pdf() will return the pdf value. Outside the range bins[0], bins[-1] the pdf() will return 0. Inside a given bin the cdf() will use a linear interpolation accross the bin Outside the range bins[0], bins[-1] the cdf() will return (0 or 1), respectively The ppf() is computed by inverting the cdf(). ppf(0) will return bins[0] ppf(1) will return bins[-1] """ # pylint: disable=protected-access name = "hist" version = 0 _support_mask = rv_continuous._support_mask def __init__(self, bins, pdfs, *args, **kwargs): """ Create a new distribution using the given histogram Parameters ---------- bins : array_like The array containing the (n+1) bin boundaries pdfs : array_like The array containing the (npdf, n) bin values """ self._hbins = np.asarray(bins) self._nbins = self._hbins.size - 1 self._hbin_widths = self._hbins[1:] - self._hbins[:-1] self._xmin = self._hbins[0] self._xmax = self._hbins[-1] if np.shape(pdfs)[-1] != self._nbins: # pragma: no cover raise ValueError( "Number of bins (%i) != number of values (%i)" % (self._nbins, np.shape(pdfs)[-1]) ) check_input = kwargs.pop("check_input", True) if check_input: pdfs_2d = reshape_to_pdf_size(pdfs, -1) sums = np.sum(pdfs_2d * self._hbin_widths, axis=1) self._hpdfs = (pdfs_2d.T / sums).T else: # pragma: no cover self._hpdfs = reshape_to_pdf_size(pdfs, -1) self._hcdfs = None # Set support kwargs["shape"] = pdfs.shape[:-1] super().__init__(*args, **kwargs) self._addmetadata("bins", self._hbins) self._addobjdata("pdfs", self._hpdfs) def _compute_cdfs(self): copy_shape = np.array(self._hpdfs.shape) copy_shape[-1] += 1 self._hcdfs = np.ndarray(copy_shape) self._hcdfs[:, 0] = 0.0 self._hcdfs[:, 1:] = np.cumsum(self._hpdfs * self._hbin_widths, axis=1) @property def bins(self): """Return the histogram bin edges""" return self._hbins @property def pdfs(self): """Return the histogram bin values""" return self._hpdfs def _pdf(self, x, row): # pylint: disable=arguments-differ return evaluate_hist_x_multi_y(x, row, self._hbins, self._hpdfs).ravel() def _cdf(self, x, row): # pylint: disable=arguments-differ if self._hcdfs is None: # pragma: no cover self._compute_cdfs() return interpolate_x_multi_y( x, row, self._hbins, self._hcdfs, bounds_error=False, fill_value=(0.0, 1.0) ).ravel() def _ppf(self, x, row): # pylint: disable=arguments-differ if self._hcdfs is None: # pragma: no cover self._compute_cdfs() return interpolate_multi_x_y( x, row, self._hcdfs, self._hbins, bounds_error=False, fill_value=(self._xmin, self._xmax), ).ravel() def _munp(self, m, *args): """compute moments""" # pylint: disable=arguments-differ # Silence floating point warnings from integration. with np.errstate(all="ignore"): vals = self.custom_generic_moment(m) return vals
[docs] def custom_generic_moment(self, m): """Compute the mth moment""" m = np.asarray(m) dx = self._hbins[1] - self._hbins[0] xv = 0.5 * (self._hbins[1:] + self._hbins[:-1]) return np.sum(xv**m * self._hpdfs, axis=1) * dx
def _updated_ctor_param(self): """ Set the bins as additional constructor argument """ dct = super()._updated_ctor_param() dct["bins"] = self._hbins dct["pdfs"] = self._hpdfs return dct
[docs] @classmethod def get_allocation_kwds(cls, npdf, **kwargs): if "bins" not in kwargs: # pragma: no cover raise ValueError("required argument 'bins' not included in kwargs") nbins = len(kwargs["bins"].flatten()) return dict(pdfs=((npdf, nbins - 1), "f4"))
[docs] @classmethod def plot_native(cls, pdf, **kwargs): """Plot the PDF in a way that is particular to this type of distibution For a histogram this shows the bin edges """ axes, _, kw = get_axes_and_xlims(**kwargs) vals = pdf.dist.pdfs[pdf.kwds["row"]] return plot_pdf_histogram_on_axes(axes, hist=(pdf.dist.bins, vals), **kw)
[docs] @classmethod def add_mappings(cls): """ Add this classes mappings to the conversion dictionary """ cls._add_creation_method(cls.create, None) cls._add_extraction_method(extract_hist_values, None) cls._add_extraction_method(extract_hist_samples, "samples")
[docs] @classmethod def make_test_data(cls): """Make data for unit tests""" cls.test_data = dict( hist=dict( gen_func=hist, ctor_data=dict(bins=XBINS, pdfs=HIST_DATA), convert_data=dict(bins=XBINS), atol_diff=1e-1, atol_diff2=1e-1, test_xvals=TEST_XVALS, ), hist_samples=dict( gen_func=hist, ctor_data=dict(bins=XBINS, pdfs=HIST_DATA), convert_data=dict(bins=XBINS, method="samples", size=NSAMPLES), atol_diff=1e-1, atol_diff2=1e-1, test_xvals=TEST_XVALS, do_samples=True, ), )
hist = hist_gen.create add_class(hist_gen)