Source code for qp.conversion_funcs

"""This module implements functions to convert distributions between various representations
These functions should then be registered with the `qp.ConversionDict` using `qp_add_mapping`.
That will allow the automated conversion mechanisms to work.
"""
import numpy as np
from scipy import integrate as sciint
from scipy import interpolate as sciinterp

from .lazy_modules import mixture
from .sparse_rep import (
    build_sparse_representation,
    decode_sparse_indices,
    indices2shapes,
)


[docs]def extract_vals_at_x(in_dist, **kwargs):
    """Convert using a set of x and y values.

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    xvals : `np.array`
        Locations at which the pdf is evaluated

    Returns
    -------
    data : `dict`
        The extracted data
    """
    xvals = kwargs.pop("xvals", None)
    if xvals is None:  # pragma: no cover
        raise ValueError("To convert to extract_xy_vals you must specify xvals")
    yvals = in_dist.pdf(xvals)
    return dict(xvals=xvals, yvals=yvals)


[docs]def extract_xy_vals(in_dist, **kwargs):
    """Convert using a set of x and y values.

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    xvals : `np.array`
        Locations at which the pdf is evaluated

    Returns
    -------
    data : `dict`
        The extracted data
    """
    xvals = kwargs.pop("xvals", None)
    if xvals is None:  # pragma: no cover
        raise ValueError("To convert using extract_xy_vals you must specify xvals")
    yvals = in_dist.pdf(xvals)
    expand_x = np.ones(yvals.shape) * np.squeeze(xvals)
    return dict(xvals=expand_x, yvals=yvals)


[docs]def extract_samples(in_dist, **kwargs):
    """Convert using a set of values sampled from the PDF

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    size : `int`
        Number of samples to generate

    Returns
    -------
    data : `dict`
        The extracted data
    """
    samples = in_dist.rvs(size=kwargs.pop("size", 1000))
    xvals = kwargs.pop("xvals")
    return dict(samples=samples, xvals=xvals, yvals=None)


[docs]def extract_hist_values(in_dist, **kwargs):
    """Convert using a set of values sampled from the PDF

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    bins : `np.array`
        Histogram bin edges

    Returns
    -------
    data : `dict`
        The extracted data
    """
    bins = kwargs.pop("bins", None)
    if bins is None:  # pragma: no cover
        raise ValueError("To convert using extract_hist_samples you must specify bins")
    bins, pdfs = in_dist.histogramize(bins)
    return dict(bins=bins, pdfs=pdfs)


[docs]def extract_hist_samples(in_dist, **kwargs):
    """Convert using a set of values samples that are then histogramed

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    bins : `np.array`
        Histogram bin edges
    size : `int`
        Number of samples to generate

    Returns
    -------
    data : `dict`
        The extracted data
    """
    bins = kwargs.pop("bins", None)
    size = kwargs.pop("size", 1000)
    if bins is None:  # pragma: no cover
        raise ValueError("To convert using extract_hist_samples you must specify bins")
    samples = in_dist.rvs(size=size)

    def hist_helper(sample):
        return np.histogram(sample, bins=bins)[0]

    vv = np.vectorize(
        hist_helper, signature="(%i)->(%i)" % (samples.shape[0], bins.size - 1)
    )
    pdfs = vv(samples)
    return dict(bins=bins, pdfs=pdfs)


[docs]def extract_quantiles(in_dist, **kwargs):
    """Convert using a set of quantiles and the locations at which they are reached

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    quantiles : `np.array`
        Quantile values to use

    Returns
    -------
    data : `dict`
        The extracted data
    """
    quants = kwargs.pop("quants", None)
    if quants is None:  # pragma: no cover
        raise ValueError("To convert using extract_quantiles you must specify quants")
    locs = in_dist.ppf(quants)
    return dict(quants=quants, locs=locs)


[docs]def extract_fit(in_dist, **kwargs):  # pragma: no cover
    """Convert to a functional distribution by fitting it to a set of x and y values

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    xvals : `np.array`
        Locations at which the pdf is evaluated

    Returns
    -------
    data : `dict`
        The extracted data
    """
    raise NotImplementedError("extract_fit")
    # xvals = kwargs.pop('xvals', None)
    # if xvals is None:
    #   raise ValueError("To convert using extract_fit you must specify xvals")
    ##vals = in_dist.pdf(xvals)


[docs]def extract_mixmod_fit_samples(in_dist, **kwargs):
    """Convert to a mixture model using a set of values sample from the pdf

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    ncomps : `int`
        Number of components in mixture model to use
    nsamples : `int`
        Number of samples to generate
    random_state : `int`
        Used to reproducibly generate random variate from in_dist

    Returns
    -------
    data : `dict`
        The extracted data
    """
    n_comps = kwargs.pop("ncomps", 3)
    n_sample = kwargs.pop("nsamples", 1000)
    random_state = kwargs.pop("random_state", None)
    samples = in_dist.rvs(size=n_sample, random_state=random_state)

    def mixmod_helper(samps):
        estimator = mixture.GaussianMixture(n_components=n_comps)
        estimator.fit(samps.reshape(-1, 1))
        weights = estimator.weights_
        means = estimator.means_[:, 0]
        stdevs = np.sqrt(estimator.covariances_[:, 0, 0])
        ov = np.vstack([weights, means, stdevs])
        return ov

    vv = np.vectorize(mixmod_helper, signature="(%i)->(3,%i)" % (n_sample, n_comps))
    fit_vals = vv(samples)
    return dict(
        weights=fit_vals[:, 0, :], means=fit_vals[:, 1, :], stds=fit_vals[:, 2, :]
    )


[docs]def extract_voigt_mixmod(in_dist, **kwargs):  # pragma: no cover
    """Convert to a voigt mixture model starting with a gaussian mixture model,
    trivially by setting gammas to 0

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Returns
    -------
    data : `dict`
        The extracted data
    """
    objdata = in_dist.objdata()
    means = objdata["means"]
    stds = objdata["stds"]
    weights = objdata["weights"]
    gammas = np.zeros_like(means)
    return dict(means=means, stds=stds, weights=weights, gammas=gammas, **kwargs)


[docs]def extract_voigt_xy(in_dist, **kwargs):  # pragma: no cover
    """Build a voigt function basis and run a match-pursuit algorithm to fit gridded data

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Returns
    -------
    data : `dict`
        The extracted data as sparse indices, basis, and metadata to rebuild the basis
    """

    sparse_results = extract_voigt_xy_sparse(in_dist, **kwargs)
    indices = sparse_results["indices"]
    meta = sparse_results["metadata"]

    w, m, s, g = indices2shapes(indices, meta)
    return dict(means=m, stds=s, weights=w, gammas=g)


[docs]def extract_voigt_xy_sparse(in_dist, **kwargs):  # pragma: no cover
    """Build a voigt function basis and run a match-pursuit algorithm to fit gridded data

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Returns
    -------
    data : `dict`
        The extracted data as shaped parameters means, stds, weights, gammas
    """

    yvals = in_dist.objdata()["yvals"]

    default = in_dist.metadata()["xvals"][0]
    z = kwargs.pop("xvals", default)
    nz = kwargs.pop("nz", 300)

    minz = np.min(z)
    _, j = np.where(yvals > 0)
    maxz = np.max(z[j])
    newz = np.linspace(minz, maxz, nz)
    interp = sciinterp.interp1d(z, yvals, assume_sorted=True)
    newpdf = interp(newz)
    newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1)
    ALL, bigD, _ = build_sparse_representation(newz, newpdf)
    return dict(indices=ALL, metadata=bigD)


[docs]def extract_sparse_from_xy(in_dist, **kwargs):  # pragma: no cover
    """Extract sparse representation from an xy interpolated representation

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    xvals : array-like
        Used to override the y-values
    xvals : array-like
        Used to override the x-values
    nvals : int
        Used to override the number of bins

    Returns
    -------
    metadata : `dict`
        Dictionary with data for sparse representation

    Notes
    -----
    This function will rebin to a grid more suited to the in_dist support by
    removing x-values corrsponding to y=0
    """
    default = in_dist.objdata()["yvals"]
    yvals = kwargs.pop("yvals", default)
    default = in_dist.metadata()["xvals"][0]
    xvals = kwargs.pop("xvals", default)
    nvals = kwargs.pop("nvals", 300)
    # rebin to a grid more suited to the in_dist support
    xmin = np.min(xvals)
    _, j = np.where(yvals > 0)
    xmax = np.max(xvals[j])
    newx = np.linspace(xmin, xmax, nvals)
    interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True)
    newpdf = interp(newx)
    sparse_indices, metadata, _ = build_sparse_representation(newx, newpdf)
    metadata["xvals"] = newx
    metadata["sparse_indices"] = sparse_indices
    metadata.pop("Ntot")
    return metadata


[docs]def extract_xy_sparse(in_dist, **kwargs):  # pragma: no cover
    """Extract xy-interpolated representation from an sparese representation

    Parameters
    ----------
    in_dist : `qp.Ensemble`
        Input distributions

    Other Parameters
    ----------------
    xvals : array-like
        Used to override the y-values
    xvals : array-like
        Used to override the x-values
    nvals : int
        Used to override the number of bins

    Returns
    -------
    metadata : `dict`
        Dictionary with data for interpolated representation

    Notes
    -----
    This function will rebin to a grid more suited to the in_dist support by
    removing x-values corrsponding to y=0
    """

    yvals = in_dist.objdata()["yvals"]
    default = in_dist.metadata()["xvals"][0]
    xvals = kwargs.pop("xvals", default)
    nvals = kwargs.pop("nvals", 300)
    # rebin to a grid more suited to the in_dist support
    xmin = np.min(xvals)
    _, j = np.where(yvals > 0)
    xmax = np.max(xvals[j])
    newx = np.linspace(xmin, xmax, nvals)
    interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True)
    newpdf = interp(newx)
    sparse_indices, sparse_meta, A = build_sparse_representation(newx, newpdf)
    # decode the sparse indices into basis indices and weights
    basis_indices, weights = decode_sparse_indices(sparse_indices)
    # retrieve the weighted array of basis functions for each object
    pdf_y = A[:, basis_indices] * weights
    # normalize and sum the weighted pdfs
    x = sparse_meta["z"]
    y = pdf_y.sum(axis=-1)
    norms = sciint.trapz(y.T, x)
    y /= norms
    # super(sparse_gen, self).__init__(x, y.T, *args, **kwargs)
    xvals = x
    yvals = y.T
    return dict(xvals=xvals, yvals=yvals, **kwargs)
Source code for qp.conversion_funcs

Table of Contents

Related Topics