Source code for qp.conversion_funcs
"""This module implements functions to convert distributions between various representations
These functions should then be registered with the `qp.ConversionDict` using `qp_add_mapping`.
That will allow the automated conversion mechanisms to work.
"""
import numpy as np
from scipy import integrate as sciint
from scipy import interpolate as sciinterp
from .lazy_modules import mixture
from .sparse_rep import (
build_sparse_representation,
decode_sparse_indices,
indices2shapes,
)
[docs]def extract_vals_at_x(in_dist, **kwargs):
"""Convert using a set of x and y values.
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
xvals : `np.array`
Locations at which the pdf is evaluated
Returns
-------
data : `dict`
The extracted data
"""
xvals = kwargs.pop("xvals", None)
if xvals is None: # pragma: no cover
raise ValueError("To convert to extract_xy_vals you must specify xvals")
yvals = in_dist.pdf(xvals)
return dict(xvals=xvals, yvals=yvals)
[docs]def extract_xy_vals(in_dist, **kwargs):
"""Convert using a set of x and y values.
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
xvals : `np.array`
Locations at which the pdf is evaluated
Returns
-------
data : `dict`
The extracted data
"""
xvals = kwargs.pop("xvals", None)
if xvals is None: # pragma: no cover
raise ValueError("To convert using extract_xy_vals you must specify xvals")
yvals = in_dist.pdf(xvals)
expand_x = np.ones(yvals.shape) * np.squeeze(xvals)
return dict(xvals=expand_x, yvals=yvals)
[docs]def extract_samples(in_dist, **kwargs):
"""Convert using a set of values sampled from the PDF
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
size : `int`
Number of samples to generate
Returns
-------
data : `dict`
The extracted data
"""
samples = in_dist.rvs(size=kwargs.pop("size", 1000))
xvals = kwargs.pop("xvals")
return dict(samples=samples, xvals=xvals, yvals=None)
[docs]def extract_hist_values(in_dist, **kwargs):
"""Convert using a set of values sampled from the PDF
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
bins : `np.array`
Histogram bin edges
Returns
-------
data : `dict`
The extracted data
"""
bins = kwargs.pop("bins", None)
if bins is None: # pragma: no cover
raise ValueError("To convert using extract_hist_samples you must specify bins")
bins, pdfs = in_dist.histogramize(bins)
return dict(bins=bins, pdfs=pdfs)
[docs]def extract_hist_samples(in_dist, **kwargs):
"""Convert using a set of values samples that are then histogramed
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
bins : `np.array`
Histogram bin edges
size : `int`
Number of samples to generate
Returns
-------
data : `dict`
The extracted data
"""
bins = kwargs.pop("bins", None)
size = kwargs.pop("size", 1000)
if bins is None: # pragma: no cover
raise ValueError("To convert using extract_hist_samples you must specify bins")
samples = in_dist.rvs(size=size)
def hist_helper(sample):
return np.histogram(sample, bins=bins)[0]
vv = np.vectorize(
hist_helper, signature="(%i)->(%i)" % (samples.shape[0], bins.size - 1)
)
pdfs = vv(samples)
return dict(bins=bins, pdfs=pdfs)
[docs]def extract_quantiles(in_dist, **kwargs):
"""Convert using a set of quantiles and the locations at which they are reached
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
quantiles : `np.array`
Quantile values to use
Returns
-------
data : `dict`
The extracted data
"""
quants = kwargs.pop("quants", None)
if quants is None: # pragma: no cover
raise ValueError("To convert using extract_quantiles you must specify quants")
locs = in_dist.ppf(quants)
return dict(quants=quants, locs=locs)
[docs]def extract_fit(in_dist, **kwargs): # pragma: no cover
"""Convert to a functional distribution by fitting it to a set of x and y values
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
xvals : `np.array`
Locations at which the pdf is evaluated
Returns
-------
data : `dict`
The extracted data
"""
raise NotImplementedError("extract_fit")
# xvals = kwargs.pop('xvals', None)
# if xvals is None:
# raise ValueError("To convert using extract_fit you must specify xvals")
##vals = in_dist.pdf(xvals)
[docs]def extract_mixmod_fit_samples(in_dist, **kwargs):
"""Convert to a mixture model using a set of values sample from the pdf
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
ncomps : `int`
Number of components in mixture model to use
nsamples : `int`
Number of samples to generate
random_state : `int`
Used to reproducibly generate random variate from in_dist
Returns
-------
data : `dict`
The extracted data
"""
n_comps = kwargs.pop("ncomps", 3)
n_sample = kwargs.pop("nsamples", 1000)
random_state = kwargs.pop("random_state", None)
samples = in_dist.rvs(size=n_sample, random_state=random_state)
def mixmod_helper(samps):
estimator = mixture.GaussianMixture(n_components=n_comps)
estimator.fit(samps.reshape(-1, 1))
weights = estimator.weights_
means = estimator.means_[:, 0]
stdevs = np.sqrt(estimator.covariances_[:, 0, 0])
ov = np.vstack([weights, means, stdevs])
return ov
vv = np.vectorize(mixmod_helper, signature="(%i)->(3,%i)" % (n_sample, n_comps))
fit_vals = vv(samples)
return dict(
weights=fit_vals[:, 0, :], means=fit_vals[:, 1, :], stds=fit_vals[:, 2, :]
)
[docs]def extract_voigt_mixmod(in_dist, **kwargs): # pragma: no cover
"""Convert to a voigt mixture model starting with a gaussian mixture model,
trivially by setting gammas to 0
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Returns
-------
data : `dict`
The extracted data
"""
objdata = in_dist.objdata()
means = objdata["means"]
stds = objdata["stds"]
weights = objdata["weights"]
gammas = np.zeros_like(means)
return dict(means=means, stds=stds, weights=weights, gammas=gammas, **kwargs)
[docs]def extract_voigt_xy(in_dist, **kwargs): # pragma: no cover
"""Build a voigt function basis and run a match-pursuit algorithm to fit gridded data
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Returns
-------
data : `dict`
The extracted data as sparse indices, basis, and metadata to rebuild the basis
"""
sparse_results = extract_voigt_xy_sparse(in_dist, **kwargs)
indices = sparse_results["indices"]
meta = sparse_results["metadata"]
w, m, s, g = indices2shapes(indices, meta)
return dict(means=m, stds=s, weights=w, gammas=g)
[docs]def extract_voigt_xy_sparse(in_dist, **kwargs): # pragma: no cover
"""Build a voigt function basis and run a match-pursuit algorithm to fit gridded data
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Returns
-------
data : `dict`
The extracted data as shaped parameters means, stds, weights, gammas
"""
yvals = in_dist.objdata()["yvals"]
default = in_dist.metadata()["xvals"][0]
z = kwargs.pop("xvals", default)
nz = kwargs.pop("nz", 300)
minz = np.min(z)
_, j = np.where(yvals > 0)
maxz = np.max(z[j])
newz = np.linspace(minz, maxz, nz)
interp = sciinterp.interp1d(z, yvals, assume_sorted=True)
newpdf = interp(newz)
newpdf = newpdf / sciint.trapz(newpdf, newz).reshape(-1, 1)
ALL, bigD, _ = build_sparse_representation(newz, newpdf)
return dict(indices=ALL, metadata=bigD)
[docs]def extract_sparse_from_xy(in_dist, **kwargs): # pragma: no cover
"""Extract sparse representation from an xy interpolated representation
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
xvals : array-like
Used to override the y-values
xvals : array-like
Used to override the x-values
nvals : int
Used to override the number of bins
Returns
-------
metadata : `dict`
Dictionary with data for sparse representation
Notes
-----
This function will rebin to a grid more suited to the in_dist support by
removing x-values corrsponding to y=0
"""
default = in_dist.objdata()["yvals"]
yvals = kwargs.pop("yvals", default)
default = in_dist.metadata()["xvals"][0]
xvals = kwargs.pop("xvals", default)
nvals = kwargs.pop("nvals", 300)
# rebin to a grid more suited to the in_dist support
xmin = np.min(xvals)
_, j = np.where(yvals > 0)
xmax = np.max(xvals[j])
newx = np.linspace(xmin, xmax, nvals)
interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True)
newpdf = interp(newx)
sparse_indices, metadata, _ = build_sparse_representation(newx, newpdf)
metadata["xvals"] = newx
metadata["sparse_indices"] = sparse_indices
metadata.pop("Ntot")
return metadata
[docs]def extract_xy_sparse(in_dist, **kwargs): # pragma: no cover
"""Extract xy-interpolated representation from an sparese representation
Parameters
----------
in_dist : `qp.Ensemble`
Input distributions
Other Parameters
----------------
xvals : array-like
Used to override the y-values
xvals : array-like
Used to override the x-values
nvals : int
Used to override the number of bins
Returns
-------
metadata : `dict`
Dictionary with data for interpolated representation
Notes
-----
This function will rebin to a grid more suited to the in_dist support by
removing x-values corrsponding to y=0
"""
yvals = in_dist.objdata()["yvals"]
default = in_dist.metadata()["xvals"][0]
xvals = kwargs.pop("xvals", default)
nvals = kwargs.pop("nvals", 300)
# rebin to a grid more suited to the in_dist support
xmin = np.min(xvals)
_, j = np.where(yvals > 0)
xmax = np.max(xvals[j])
newx = np.linspace(xmin, xmax, nvals)
interp = sciinterp.interp1d(xvals, yvals, assume_sorted=True)
newpdf = interp(newx)
sparse_indices, sparse_meta, A = build_sparse_representation(newx, newpdf)
# decode the sparse indices into basis indices and weights
basis_indices, weights = decode_sparse_indices(sparse_indices)
# retrieve the weighted array of basis functions for each object
pdf_y = A[:, basis_indices] * weights
# normalize and sum the weighted pdfs
x = sparse_meta["z"]
y = pdf_y.sum(axis=-1)
norms = sciint.trapz(y.T, x)
y /= norms
# super(sparse_gen, self).__init__(x, y.T, *args, **kwargs)
xvals = x
yvals = y.T
return dict(xvals=xvals, yvals=yvals, **kwargs)