Source code for qp.hist_pdf

"""This module implements a PDT distribution sub-class using histograms
"""


import numpy as np

from scipy.stats import rv_continuous

from qp.pdf_gen import Pdf_rows_gen
from qp.conversion_funcs import extract_hist_values, extract_hist_samples
from qp.plotting import get_axes_and_xlims, plot_pdf_histogram_on_axes
from qp.utils import (
    evaluate_hist_x_multi_y,
    interpolate_multi_x_y,
    interpolate_x_multi_y,
    reshape_to_pdf_size,
)
from qp.test_data import XBINS, HIST_DATA, TEST_XVALS, NSAMPLES
from qp.factory import add_class


[docs]class hist_gen(Pdf_rows_gen):
    """Histogram based distribution

    Notes
    -----
    This implements a PDF using a set of histogramed values.

    The relevant data members are:

    bins:  n+1 bin edges (shared for all PDFs)

    pdfs:  (npdf, n) bin values

    Inside a given bin the pdf() will return the pdf value.
    Outside the range bins[0], bins[-1] the pdf() will return 0.

    Inside a given bin the cdf() will use a linear interpolation accross the bin
    Outside the range bins[0], bins[-1] the cdf() will return (0 or 1), respectively

    The ppf() is computed by inverting the cdf().
    ppf(0) will return bins[0]
    ppf(1) will return bins[-1]
    """

    # pylint: disable=protected-access

    name = "hist"
    version = 0

    _support_mask = rv_continuous._support_mask

    def __init__(self, bins, pdfs, *args, **kwargs):
        """
        Create a new distribution using the given histogram

        Parameters
        ----------
        bins : array_like
          The array containing the (n+1) bin boundaries

        pdfs : array_like
          The array containing the (npdf, n) bin values
        """
        self._hbins = np.asarray(bins)
        self._nbins = self._hbins.size - 1
        self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
        self._xmin = self._hbins[0]
        self._xmax = self._hbins[-1]
        if np.shape(pdfs)[-1] != self._nbins:  # pragma: no cover
            raise ValueError(
                "Number of bins (%i) != number of values (%i)"
                % (self._nbins, np.shape(pdfs)[-1])
            )

        check_input = kwargs.pop("check_input", True)
        if check_input:
            pdfs_2d = reshape_to_pdf_size(pdfs, -1)
            sums = np.sum(pdfs_2d * self._hbin_widths, axis=1)
            self._hpdfs = (pdfs_2d.T / sums).T
        else:  # pragma: no cover
            self._hpdfs = reshape_to_pdf_size(pdfs, -1)
        self._hcdfs = None
        # Set support
        kwargs["shape"] = pdfs.shape[:-1]
        super().__init__(*args, **kwargs)
        self._addmetadata("bins", self._hbins)
        self._addobjdata("pdfs", self._hpdfs)

    def _compute_cdfs(self):
        copy_shape = np.array(self._hpdfs.shape)
        copy_shape[-1] += 1
        self._hcdfs = np.ndarray(copy_shape)
        self._hcdfs[:, 0] = 0.0
        self._hcdfs[:, 1:] = np.cumsum(self._hpdfs * self._hbin_widths, axis=1)

    @property
    def bins(self):
        """Return the histogram bin edges"""
        return self._hbins

    @property
    def pdfs(self):
        """Return the histogram bin values"""
        return self._hpdfs

    def _pdf(self, x, row):
        # pylint: disable=arguments-differ
        return evaluate_hist_x_multi_y(x, row, self._hbins, self._hpdfs).ravel()

    def _cdf(self, x, row):
        # pylint: disable=arguments-differ
        if self._hcdfs is None:  # pragma: no cover
            self._compute_cdfs()
        return interpolate_x_multi_y(
            x, row, self._hbins, self._hcdfs, bounds_error=False, fill_value=(0.0, 1.0)
        ).ravel()

    def _ppf(self, x, row):
        # pylint: disable=arguments-differ
        if self._hcdfs is None:  # pragma: no cover
            self._compute_cdfs()
        return interpolate_multi_x_y(
            x,
            row,
            self._hcdfs,
            self._hbins,
            bounds_error=False,
            fill_value=(self._xmin, self._xmax),
        ).ravel()

    def _munp(self, m, *args):
        """compute moments"""
        # pylint: disable=arguments-differ
        # Silence floating point warnings from integration.
        with np.errstate(all="ignore"):
            vals = self.custom_generic_moment(m)
        return vals

[docs]    def custom_generic_moment(self, m):
        """Compute the mth moment"""
        m = np.asarray(m)
        dx = self._hbins[1] - self._hbins[0]
        xv = 0.5 * (self._hbins[1:] + self._hbins[:-1])
        return np.sum(xv**m * self._hpdfs, axis=1) * dx

    def _updated_ctor_param(self):
        """
        Set the bins as additional constructor argument
        """
        dct = super()._updated_ctor_param()
        dct["bins"] = self._hbins
        dct["pdfs"] = self._hpdfs
        return dct

[docs]    @classmethod
    def get_allocation_kwds(cls, npdf, **kwargs):
        if "bins" not in kwargs:  # pragma: no cover
            raise ValueError("required argument 'bins' not included in kwargs")
        nbins = len(kwargs["bins"].flatten())
        return dict(pdfs=((npdf, nbins - 1), "f4"))

[docs]    @classmethod
    def plot_native(cls, pdf, **kwargs):
        """Plot the PDF in a way that is particular to this type of distibution

        For a histogram this shows the bin edges
        """
        axes, _, kw = get_axes_and_xlims(**kwargs)
        vals = pdf.dist.pdfs[pdf.kwds["row"]]
        return plot_pdf_histogram_on_axes(axes, hist=(pdf.dist.bins, vals), **kw)

[docs]    @classmethod
    def add_mappings(cls):
        """
        Add this classes mappings to the conversion dictionary
        """
        cls._add_creation_method(cls.create, None)
        cls._add_extraction_method(extract_hist_values, None)
        cls._add_extraction_method(extract_hist_samples, "samples")

[docs]    @classmethod
    def make_test_data(cls):
        """Make data for unit tests"""
        cls.test_data = dict(
            hist=dict(
                gen_func=hist,
                ctor_data=dict(bins=XBINS, pdfs=HIST_DATA),
                convert_data=dict(bins=XBINS),
                atol_diff=1e-1,
                atol_diff2=1e-1,
                test_xvals=TEST_XVALS,
            ),
            hist_samples=dict(
                gen_func=hist,
                ctor_data=dict(bins=XBINS, pdfs=HIST_DATA),
                convert_data=dict(bins=XBINS, method="samples", size=NSAMPLES),
                atol_diff=1e-1,
                atol_diff2=1e-1,
                test_xvals=TEST_XVALS,
                do_samples=True,
            ),
        )


hist = hist_gen.create
add_class(hist_gen)
Source code for qp.hist_pdf

Table of Contents

Related Topics