"""Implementation of an Ensemble of distributions."""
from __future__ import annotations
import os
from typing import Mapping, Optional, Union
import json
import h5py
import numpy as np
import tables_io
from tables_io import hdf5
from typing import Mapping, Optional, Union
from numpy.typing import ArrayLike
from ..utils.dictionary import (
check_array_shapes,
compare_dicts,
concatenate_dicts,
slice_dict,
reduce_arrays_to_1d,
make_len_equal,
expand_dimensions,
)
from ..utils.array import encode_strings, reduce_dimensions
from ..metrics import quick_moment
from ..parameterizations.base import Pdf_gen
# import psutil
# import timeit
[docs]
class Ensemble:
"""An object comprised of one or more distributions with the same parameterization.
The Ensemble allows you to perform operations on the group of parameterizations as a whole.
An Ensemble has three main data components, the last of which is optional:
1. The metadata: this contains information about the parameterization, and
the coordinates of the parameterization.
2. The object data: this contains the data that is unique to each distribution,
for example the values that correspond to the coordinates.
3. The ancillary data (optional): this contains data points where there is one data point
for each distribution in the ensemble. There can be many of these columns or
arrays in the ancillary data table.
Parameters
----------
the_class : Pdf_gen subclass
The class to use to parameterize the distributions
data : Mapping
Dictionary with data used to construct the ensemble. The keys required
vary for different parameterizations.
ancil : Optional[Mapping]
Dictionary with ancillary data, by default None
method : Optional[str]
The key for the creation method to use, by default None
Examples
--------
>>> import qp
>>> import numpy as np
>>> data = {'bins': [0,1,2,3,4,5],
... 'pdfs': np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ancil = {'ids': [105, 108]}}
>>> ens = qp.Ensemble(qp.hist,data,ancil)
>>> ens.metadata
{'pdf_name': array([b'hist'], dtype='|S4'),
'pdf_version': array([0]),
'bins': array([[0, 1, 2, 3, 4, 5]])}
"""
def __init__(
self,
the_class: Pdf_gen,
data: Mapping,
ancil: Optional[Mapping] = None,
method: Optional[str] = None,
):
"""Class constructor. The requirements are the class object that the ensemble is
to be parameterized as, and the data dictionary.
The data dictionary will need different keys depending on what parameterization
you have chosen. If you are unsure which keys are required, try
``qp.[parameterization].create_ensemble?``, where [parameterization] is the
class of ensemble you wish to create. This will output a docstring with which
describes the necessary inputs (and this function can also be used to create an
ensemble instead).
An ancillary data dictionary can also be provided upon creation. This dictionary
should contain arrays that are the same length as the number of distributions in the
ensemble. Essentially, this should include arrays of data where each value in the array
corresponds to a distribution.
Parameters
----------
the_class : Pdf_gen subclass
The class to use to parameterize the distributions
data : Mapping
Dictionary with data used to construct the ensemble. The keys required
vary for different parameterizations.
ancil : Optional[Mapping]
Dictionary with ancillary data, by default None
method : Optional[str]
The key for the creation method to use, by default None
"""
# start_time = timeit.default_timer()
self._gen_func = the_class.creation_method(method)
self._frozen = self._gen_func(**data)
self._gen_obj = self._frozen.dist
self._gen_class = type(self._gen_obj)
self._ancil = None
self.set_ancil(ancil)
self._gridded = None
self._samples = None
def __repr__(self) -> str:
class_name = type(self).__name__
return f"{class_name}(the_class={self._gen_class.name},shape={self.shape})"
def __len__(self) -> int:
return self.npdf
def __getitem__(self, key: Union[int, slice]) -> Ensemble:
"""Build an Ensemble object for a sub-set of the distributions in this ensemble
Parameters
----------
key : Union [int , slice]
Used to slice the data to pick out one distribution from this ensemble
Returns
-------
ens : Ensemble
The ensemble for the requested distribution or slice of distributions
"""
red_data = {}
md = self.metadata
md.pop("pdf_name")
md.pop("pdf_version")
for k, v in md.items():
red_data[k] = np.squeeze(v)
if self.npdf > 1:
dd = slice_dict(self.objdata, key)
elif self.npdf == 1 and key == 0:
dd = self.objdata
else:
raise IndexError(
f"Cannot slice Ensemble object with {self.npdf} with given index/slice {key}."
)
for k, v in dd.items():
if len(np.shape(v)) < 2:
red_data[k] = np.expand_dims(v, 0)
else:
red_data[k] = v
if self._ancil is not None and self.npdf > 1:
ancil = slice_dict(self._ancil, key)
elif self._ancil is not None and self.npdf == 1:
ancil = self._ancil
else:
ancil = None
return Ensemble(self._gen_obj, data=red_data, ancil=ancil)
@property
def gen_func(self):
"""Return the function used to create the distribution object for this ensemble"""
return self._gen_func
@property
def gen_class(self):
"""Return the class used to generate distributions for this ensemble"""
return self._gen_class
@property
def dist(self):
"""Return the `scipy.stats.rv_continuous` object that generates distributions for this ensemble"""
return self._gen_obj
@property
def kwds(self):
"""Return the kwds associated to the frozen object for this ensemble"""
return self._frozen.kwds
@property
def gen_obj(self):
"""Return the `scipy.stats.rv_continuous` object that generates distributions for this ensemble"""
return self._gen_obj
@property
def frozen(self):
"""Return the `scipy.stats.rv_frozen` object that encapsulates the distributions for this ensemble"""
return self._frozen
@property
def ndim(self) -> int:
"""Return the number of dimensions of distributions in this ensemble."""
return self._frozen.ndim
@property
def shape(self) -> tuple:
"""Return the shape of distributions in this ensemble."""
return self._frozen.shape
@property
def npdf(self) -> int:
"""Return the number of distributions in this ensemble."""
return self._frozen.npdf
@property
def ancil(self) -> Mapping:
"""Return the ancillary data dictionary for this ensemble."""
return self._ancil
[docs]
def x_samples(
self, min: float = 0.0, max: float = 5.0, n: Optional[int] = 1000
) -> np.ndarray[float]:
"""Return an array of x values that can be used to plot all the distributions
in the Ensemble.
This is meant to plot the characteristic distribution for an Ensemble of
discrete data. For example, for an ensemble of histograms that would be
the PDF, and for an ensemble of quantiles that would be the CDF.
Analytic parameterizations like `mixmod <qp.mixmod_gen>` or `scipy.stats.norm` will just return a
`np.linspace(min,max,n) <numpy.linspace>`, and it's recommended you input the values as
the defaults are the same for all analytic distributions.
Parameters
----------
min : float, optional
The minimum x value to be used if the parameterization doesn't have an
`x_samples` method or is analytic, by default 0.
max : float, optional
The maximum x value to be used if the parameterization doesn't have an
`x_samples` method or is analytic, by default 5.
n : Optional[int], optional
The number of points to be used if the parameterization doesn't have an
`x_samples` method or is analytic, by default 1000
Returns
-------
xs : np.ndarray[float]
The array of points to use.
"""
try:
return self._frozen.dist.x_samples()
except:
return np.linspace(min, max, n)
[docs]
def convert_to(self, to_class: Pdf_gen, **kwargs: str) -> Ensemble:
"""Convert this ensemble to the given parameterization class. To see
the available conversion methods for the your chosen parameterization
and their required arguments, check the docstrings for ``qp.to_class``.
If the parameterization class doesn't have a conversion methods table,
then it will not be possible to convert to that class.
Parameters
----------
to_class : Pdf_gen subclass
Parameterization class to convert to
**kwargs :
Keyword arguments that are passed to the output class constructor
Other Parameters
----------------
method : str
Optional argument to specify a non-default conversion algorithm
Returns
-------
ens : Ensemble
Ensemble of distributions of type class_to using the data from this object
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]]))
>>> ens_i = ens_h.convert_to(qp.interp, xvals=np.linspace(0,5,10))
>>> ens_i.metadata
{'pdf_name': array([b'interp'], dtype='|S6'),
'pdf_version': array([0]),
'xvals': array([0. , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
2.77777778, 3.33333333, 3.88888889, 4.44444444, 5. ]))}
"""
kwds = kwargs.copy()
method = kwds.pop("method", None)
ctor_func = to_class.creation_method(method)
class_name = to_class.name
if ctor_func is None: # pragma: no cover
raise KeyError(
"Class named %s does not have a creation_method named %s"
% (class_name, method)
)
extract_func = to_class.extraction_method(method)
if extract_func is None: # pragma: no cover
raise KeyError(
"Class named %s does not have a extraction_method named %s"
% (class_name, method)
)
data = extract_func(self, **kwds)
return Ensemble(to_class, data=data, method=method)
[docs]
def update(self, data: Mapping, ancil: Optional[Mapping] = None) -> None:
"""Update the frozen distribution object with the given data, and set
the ancillary data table with ``ancil`` if given.
Parameters
----------
data : Mapping
Dictionary with data used to construct the ensemble, including metadata.
ancil : Optional[Mapping], optional
Optional dictionary that contains data for each of the distributions
in the ensemble, by default None.
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([0,0.1,0.1,0.4,0.2]))
>>> ens_h.update(data={'bins': np.array([1,2,3,4,5]), 'pdfs': np.array([0.1,0.1,0.4,0.2])})
>>> ens_h.metadata
{'pdf_name': array([b'hist'], dtype='|S4'),
'pdf_version': array([0]),
'bins': array([[1, 2, 3, 4, 5]])}
"""
self._frozen = self._gen_func(**data)
self._gen_obj = self._frozen.dist
self.set_ancil(ancil)
self._gridded = None
self._samples = None
[docs]
def update_objdata(self, data: Mapping, ancil: Optional[Mapping] = None) -> None:
"""Updates the objdata in the frozen distribution, and sets
the ancillary data table if given.
Parameters
----------
data : Mapping
Dictionary with the object data that will be used to reconstruct the ensemble
ancil : Optional[Mapping], optional
Optional dictionary that contains data for each of the distributions
in the ensemble, by default None.
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([0,0.1,0.1,0.4,0.2]))
>>> ens_h.objdata
{'pdfs': array([0. , 0.125, 0.125, 0.5 , 0.25 ])}
>>> ens_h.update_objdata(data={'pdfs': np.array([0.05,0.09,0.2,0.3,0.15])})
>>> ens_h.objdata
{'pdfs': array([[0.06329114, 0.11392405, 0.25316456, 0.37974684, 0.18987342]])}
"""
new_data = {}
for k, v in self.metadata.items():
if k in ["pdf_name", "pdf_version"]:
continue
new_data[k] = np.squeeze(v)
new_data.update(self.objdata)
new_data.update(data)
self.update(new_data, ancil)
@property
def metadata(self) -> Mapping:
"""Return the metadata for this ensemble. Metadata are elements that are
the same for all the distributions in the ensemble. These include the name
and version of the distribution generation class
Returns
-------
metadata : Mapping
The dictionary of the metadata.
"""
dd = {}
dd.update(self._gen_obj.metadata)
return dd
@property
def objdata(self) -> Mapping:
"""Return the data for this ensemble. These are the elements that differ
for each distribution in the ensemble. For example, the data points that
correspond to each of the coordinates given in the metadata.
Returns
-------
objdata : Mapping
The object data
Notes
-----
If the distribution normalized the data (which many do by default), this
will return the normalized data and not the original input data.
"""
dd = {}
dd.update(self._frozen.kwds)
dd.pop("row", None)
dd.update(self._gen_obj.objdata)
# if there is only one distribution reshape data as necessary
if self.npdf == 1:
dd = reduce_arrays_to_1d(dd)
return dd
[docs]
def set_ancil(self, ancil: Mapping) -> None:
"""Set the ancillary data dictionary. The arrays in this dictionary must have
one row for each of the distributions, which means that the length of these
arrays (or the first dimension) must be the same as the number of distributions
in the ensemble.
Parameters
----------
ancil : Mapping
The ancillary data dictionary.
Raises
------
IndexError
If the length of the arrays in ancil does not match the number of
distributions in the Ensemble.
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]]))
>>> ancil = {'ids': np.array([5,7])}
>>> ens_h.set_ancil(ancil)
>>> ens_h.ancil
{'ids': array([5, 7])}
"""
check_array_shapes(ancil, self.npdf)
self._ancil = ancil
[docs]
def add_to_ancil(self, to_add: Mapping) -> None: # pragma: no cover
"""Add additional columns to the ancillary data dictionary. The
ancil dictionary must already exist. If it does not, use `set_ancil`.
If any of these columns have the same name as already existing
ancillary data columns, the new columns will overwrite the old ones.
Parameters
----------
to_add : Mapping
The columns to add to the ancillary data dict
Raises
------
IndexError
If the length of the arrays in to_add does not match the number of
distributions in the Ensembles
Examples
--------
>>> import qp
>>> import numpy as np
>>> ancil = {'ids': np.array([5,7])}
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]]), ancil=ancil)
>>> ens_h.add_to_ancil({'means':np.array([0.2,0.25])})
>>> ens_h.ancil
{'ids': array([5, 7]), 'means': array[0.2,0.25]}
"""
check_array_shapes(to_add, self.npdf)
self._ancil.update(to_add)
[docs]
def append(self, other_ens: Ensemble) -> None:
"""Append another ensemble to this ensemble. The ensembles must be
of the same parameterization, or this will not work. They must also
have the same metadata, so for example if they are both histograms
they must also have the same bins.
Both ensembles must have an ancillary data dictionary in order for them
to be appended to each other. If one ensemble has an ancillary data
dictionary and the other does not, this will set the ancillary data
dictionary to `None`.
Parameters
----------
other_ens : Ensemble
The ensemble to append to this one.
Raises
------
KeyError
Raised if the two ensembles do not have matching metadata.
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_1 = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([0,0.1,0.1,0.4,0.2]))
>>> ens_2 = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([0.5,0.15,0.25,0.45,0.1]))
>>> ens_1.append(ens_2)
>>> ens_1.npdf
2
"""
if not compare_dicts([self.metadata, other_ens.metadata]): # pragma: no cover
raise KeyError("Metadata does not match, can not append")
full_objdata = concatenate_dicts([self.objdata, other_ens.objdata])
if self._ancil is not None and other_ens.ancil is not None: # pragma: no cover
full_ancil = concatenate_dicts([self.ancil, other_ens.ancil])
else:
full_ancil = None
self.update_objdata(full_objdata, full_ancil)
[docs]
def build_tables(self, encode: bool = False, ext: Optional[str] = None) -> Mapping:
"""Returns a dictionary of dictionaries of numpy arrays for the meta data,
object data, and the ancillary data (if it exists) for this ensemble.
Parameters
----------
encode : bool
If True and `ext` is 'hdf5', will encode any string columns in the `ancil` table,
by default False.
ext : str, optional
If set to 'hdf5' when `encode` is True, will encode any string columns
in the `ancil` table, by default None.
Returns
-------
data : Mapping, `tables_io.TableDict-like`
The dictionary with the data. Has the keys: ``meta`` for metadata, ``data``
for object data, and optionally ``ancil`` for ancillary data.
"""
meta = make_len_equal(self.metadata)
dd = dict(meta=meta, data=self.objdata)
# expand out the objdata to 2D arrays if there's only 1 distribution
if self.npdf == 1:
new_objdata = expand_dimensions(dd["data"], self.npdf, self.shape[1])
dd.update(dict(data=new_objdata))
if self.ancil is not None:
# encode any string columns if the file will be hdf5
if encode == True and ext == "hdf5":
ancil_tmp = encode_strings(self.ancil)
dd["ancil"] = ancil_tmp
else:
dd["ancil"] = self.ancil
return dd
[docs]
def norm(self):
"""Normalizes the input distribution data if it represents a PDF
and can be normalized.
Raises
------
AttributeError
Raised if the parameterization doesn't have a normalization method.
"""
# get normalized data values
try:
normed = self._gen_obj.normalize()
except AttributeError as err:
raise AttributeError(
"This parameterization does not have a normalization function."
) from err
except RuntimeError as err:
raise err
# update ensemble objdata with normalized values
d_keys = list(self.objdata.keys())
# add in any data from current ensemble which was unchanged
for key in d_keys:
if not key in normed.keys():
normed[key] = self.objdata[key]
self.update_objdata(data=normed, ancil=self.ancil)
[docs]
def mode(self, grid: ArrayLike) -> ArrayLike:
"""Return the mode of each ensemble distribution, evaluated on the given grid.
Parameters
----------
grid : ArrayLike
Grid on which to evaluate distribution
Returns
-------
mode : ArrayLike
The modes of the distributions evaluated on grid, with shape (npdf, 1)
"""
new_grid, griddata = self.gridded(grid)
return np.expand_dims(new_grid[np.argmax(griddata, axis=1)], -1)
[docs]
def gridded(self, grid: ArrayLike) -> tuple[ArrayLike, ArrayLike]:
"""Build, cache and return the PDF values at the given grid points.
If the given grid matches the already cached grid, then this just
returns the cached value.
Parameters
----------
grid : ArrayLike
The grid points to evaluate the PDF at.
Returns
-------
gridded : tuple [ ArrayLike, ArrayLike ]
(grid, pdf_values)
"""
if self._gridded is None or not np.array_equal(self._gridded[0], grid):
self._gridded = (grid, self.pdf(grid))
return self._gridded
[docs]
def write_to(self, filename: str) -> None:
"""Write this ensemble to a file.
The file type can be any of the those supported by tables_io. File type
is indicated by the suffix of the file name given. Allowed formats are:
'hdf5','h5','hf5','hd5','fits','fit','pq','parq','parquet'
If writing to parquet files, a file will be written for the metadata,
the object data, and the ancillary data if it exists, where the identifying
key is added to the filename.
Parameters
----------
filename : str
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_1 = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([0,0.1,0.1,0.4,0.2]))
>>> ens_1.write_to("hist-ensemble.hdf5")
"""
basename, ext = os.path.splitext(filename)
tables = self.build_tables(encode=True, ext=ext[1:])
tables_io.write(tables, basename, ext[1:])
[docs]
def to_json(self) -> dict[str, str]:
"""Convert this ensemble to a json string
"""
tables = self.build_tables()
# fix the type to make json happier
tables['meta']['pdf_name'] = np.array([tables['meta']['pdf_name'][0].decode()])
json_dict = tables_io.convert(tables, tables_io.types.JSON_STRING)
return json_dict
[docs]
def pdf(self, x: ArrayLike) -> ArrayLike:
"""
Evaluates the probability density function (PDF) for each of the distributions in the ensemble
Parameters
----------
x : ArrayLike
Location(s) at which to evaluate the PDF for each distribution.
Returns
-------
pdf : ArrayLike
The PDF value(s) at the given location(s).
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.pdf(np.linspace(3,6,6))
array([[0.5 , 0.5 , 0.25 , 0.25 , 0. ,
0. ],
[0.37974684, 0.37974684, 0.18987342, 0.18987342, 0. ,
0. ]])
"""
pdf = self._frozen.pdf(x)
# reduce dimensionality if possible
if self.npdf == 1:
pdf = reduce_dimensions(pdf, x)
return pdf
# return self._frozen.pdf(x)
[docs]
def logpdf(self, x: ArrayLike) -> ArrayLike:
"""
Evaluates the log of the probability density function (PDF) for each of the distributions in the ensemble.
Parameters
----------
x : ArrayLike
Location(s) at which to do the evaluations
Returns
-------
logpdf : ArrayLike
The log of the PDF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.logpdf(np.linspace(3,6,6))
array([[-0.69314718, -0.69314718, -1.38629436, -1.38629436, -inf,
-inf],
[-0.96825047, -0.96825047, -1.66139765, -1.66139765, -inf,
-inf]])
"""
logpdf = self._frozen.logpdf(x)
# reduce dimensionality if possible
if self.npdf == 1:
logpdf = reduce_dimensions(logpdf, x)
return logpdf
[docs]
def cdf(self, x: ArrayLike) -> ArrayLike:
"""
Evaluates the cumulative distribution function (CDF) for each of the distributions in the ensemble.
Parameters
----------
x : ArrayLike
Location(s) at which to do the evaluations
Returns
-------
cdf : ArrayLike
The CDF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.cdf(np.linspace(3,6,6))
array([[0.25 , 0.55 , 0.8 , 0.95 , 1. ,
1. ],
[0.43037975, 0.65822785, 0.84810127, 0.96202532, 1. ,
1. ]])
"""
cdf = self._frozen.cdf(x)
# reduce dimensionality if possible
if self.npdf == 1:
cdf = reduce_dimensions(cdf, x)
return cdf
[docs]
def logcdf(self, x: ArrayLike) -> ArrayLike:
"""
Evaluates the log of the cumulative distribution function (CDF) for each of the distributions in the ensemble.
Parameters
----------
x : ArrayLike
Location(s) at which to do the evaluations
Returns
-------
cdf : ArrayLike
The log of the CDF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.logcdf(np.linspace(3,6,6))
array([[-1.38629436, -0.597837 , -0.22314355, -0.05129329, 0. ,
0. ],
[-0.84308733, -0.41820413, -0.16475523, -0.03871451, 0. ,
0. ]])
"""
logcdf = self._frozen.logcdf(x)
# reduce dimensionality if possible
if self.npdf == 1:
logcdf = reduce_dimensions(logcdf, x)
return logcdf
[docs]
def ppf(self, q: ArrayLike) -> ArrayLike:
"""
Evaluates the percentage point function (PPF) for each of the distributions in the ensemble..
Parameters
----------
q : ArrayLike
Location(s) at which to do the evaluations
Returns
-------
ppf : ArrayLike
The PPF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.ppf(0.5)
array([[3.5 ],
[3.18333333]])
"""
ppf = self._frozen.ppf(q)
# reduce dimensionality if possible
if self.npdf == 1:
ppf = reduce_dimensions(ppf, q)
return ppf
[docs]
def sf(self, q: ArrayLike) -> ArrayLike:
"""
Evaluates the survival fraction (SF) for each of the distributions in the ensemble.
Parameters
----------
q : ArrayLike
Location(s) at which to evaluate the distributions
Returns
-------
sf : ArrayLike
The SF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.sf(0.5)
array([[1. ],
[0.96835443]])
"""
sf = self._frozen.sf(q)
# reduce dimensionality if possible
if self.npdf == 1:
sf = reduce_dimensions(sf, q)
return sf
[docs]
def logsf(self, q: ArrayLike) -> ArrayLike:
"""Evaluates the log of the survival function (SF) for each of the distributions in the ensemble.
Parameters
----------
q : ArrayLike
Location(s) at which to evaluate the distributions
Returns
-------
sf : ArrayLike
The log of the SF at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.logsf(0.5)
array([[ 0. ],
[-0.03215711]])
"""
logsf = self._frozen.logsf(q)
# reduce dimensionality if possible
if self.npdf == 1:
logsf = reduce_dimensions(logsf, q)
return logsf
[docs]
def isf(self, q: ArrayLike) -> ArrayLike:
"""
Evaluates the inverse of the survival fraction (SF) for each of the distributions in the ensemble.
Parameters
----------
q : ArrayLike
Location(s) at which to evaluate the distributions
Returns
-------
sf : ArrayLike
The inverse of the survival fraction at the given location(s)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.isf(0.5)
array([[3.5 ],
[3.18333333]])
"""
isf = self._frozen.isf(q)
# reduce dimensionality if possible
if self.npdf == 1:
isf = reduce_dimensions(isf, q)
return isf
[docs]
def rvs(
self,
size: int = 1,
random_state: Union[None, int, np.random.Generator] = None,
) -> ArrayLike:
"""
Generate samples from the distributions in this ensemble.
The returned samples are of shape (npdf, size), where size is the number
of samples per distribution.
Parameters
----------
size : int, optional
Number of samples to return, by default 1.
random_state : int, numpy.random.Generator, None, optional
The random state to use. Can be provided with a random seed for consistency. By default None.
Returns
-------
samples : ArrayLike
The array of samples for each distribution in the ensemble, shape (npdf,size)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.rvs(size=2)
array([[3.12956247, 3.72090937],
[4.96783836, 3.24016123]])
"""
return self._frozen.rvs(
size=(self._frozen.npdf, size), random_state=random_state
)
[docs]
def stats(self, moments: str = "mv") -> tuple[ArrayLike, ...]:
"""
Return some statistics for each of the distributions in this ensemble.
The moments to be returned are determined by the string given to `moments`,
where each letter represents a specific moment. The options are:
"m" = mean, "v" = variance, "s" = (Fisher's) skew, "k" = (Fisher's) kurtosis.
Parameters
----------
moments : str, optional
Which moments to include, by default "mv"
Returns
-------
stats : tuple[ArrayLike, ... ]
A sequence of arrays of the moments requested, where the shape of the arrays is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.stats()
(array([[3.375 ],
[3.01898734]]),
array([[0.859375 ],
[1.23698125]]))
"""
return self._frozen.stats(moments=moments)
[docs]
def mean(self) -> ArrayLike:
"""Return the mean for each of the distributions in this ensemble.
Returns
-------
means : ArrayLike
The mean for each distribution, returns a float if there is only one
distribution, or the shape of the array is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.mean()
array([[3.375 ],
[3.01898734]])
"""
mean = self._frozen.mean()
# reduce dimensionality if possible
if self.npdf == 1:
mean = reduce_dimensions(mean, 1)
return mean
[docs]
def var(self) -> ArrayLike:
"""Return the variance for each of the distributions in this ensemble.
Returns
-------
variances : ArrayLike
The variance for each distribution, returns a float if there is only one
distribution, or the shape of the array is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.var()
array([[0.859375 ],
[1.23698125]])
"""
var = self._frozen.var()
# reduce dimensionality if possible
if self.npdf == 1:
var = reduce_dimensions(var, 1)
return var
[docs]
def std(self) -> ArrayLike:
"""Return the standard deviation for each of the distributions in this ensemble.
Returns
-------
stds : ArrayLike
The standard deviations for each distribution, the shape of the array is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.std()
array([[0.92702481],
[1.11219659]])
"""
std = self._frozen.std()
# reduce dimensionality if possible
if self.npdf == 1:
std = reduce_dimensions(std, 1)
return std
[docs]
def moment(self, n: int) -> ArrayLike:
"""Return the nth moment for each of the distributions in this ensemble.
Parameters
----------
n : int
The order of the moment
Returns
-------
moments : ArrayLike
The nth moment for each distribution, the shape of the array is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.moment(2)
array([[12.25 ],
[10.35126582]])
"""
moment = self._frozen.moment(n)
# reduce dimensionality if possible
if self.npdf == 1:
moment = reduce_dimensions(moment, 1)
return moment
[docs]
def entropy(self) -> ArrayLike:
"""Return the differential entropy for each of the distributions in this ensemble.
Returns
-------
entropy : ArrayLike
The entropy for each distribution, the shape of the array is (npdf, 1)
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.entropy()
array([[1.21300757],
[1.45307405]])
"""
entropy = self._frozen.entropy()
# reduce dimensionality if possible
if self.npdf == 1:
entropy = reduce_dimensions(entropy, 1)
return entropy
# def pmf(self, k):
# """ Return the kth pmf for this ensemble """
# return self._frozen.pmf(k)
# def logpmf(self, k):
# """ Return the log of the kth pmf for this ensemble """
# return self._frozen.logpmf(k)
[docs]
def interval(self, alpha: ArrayLike) -> tuple[ArrayLike, ...]:
"""
Return the intervals corresponding to a confidence level of `alpha` for each of the
distributions in this ensemble.
Parameters
----------
alpha : ArrayLike
The array of values to return intervals for. These should be the probability that a random variable will be
drawn from the returned range. Each value should be in the range [0,1].
Returns
-------
interval : tuple[ArrayLike, ...]
A tuple of the arrays containing the intervals for each distribution, where the
shape of the arrays is (npdf, len(alpha))
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.interval(alpha=[0,0.5,0.9])
(array([[1.4 , 3. , 3.5 ],
[0.79 , 2.2875 , 3.18333333]]),
array([[3.5 , 4. , 4.8 ],
[3.18333333, 3.84166667, 4.73666667]]))
"""
return self._frozen.interval(alpha)
[docs]
def histogramize(self, bins: ArrayLike) -> tuple[ArrayLike]:
"""
Computes integrated histogram bin values for all distributions in the ensemble.
Parameters
----------
bins : ArrayLike
Array of N+1 endpoints of N bins
Returns
-------
histogram: tuple[ArrayLike, ArrayLike]
The first array in the tuple is the bin edges that were input. The second
array in the tuple is an (npdf, N) array of the values in the bins.
Examples
--------
>>> import qp
>>> import numpy as np
>>> ens_h = qp.hist.create_ensemble(bins= np.array([0,1,2,3,4,5]),
... pdfs = np.array([[0,0.1,0.1,0.4,0.2],[0.05,0.09,0.2,0.3,0.15]])
>>> ens_h.histogramize(bins=np.array([1,2,3,4,5]))
(array([1, 2, 3, 4, 5]),
array([[0.125 , 0.125 , 0.5 , 0.25 ],
[0.11392405, 0.25316456, 0.37974684, 0.18987342]]))
"""
return self._frozen.histogramize(bins)
[docs]
def integrate(
self, limits: tuple[Union[float, ArrayLike], Union[float, ArrayLike]]
) -> ArrayLike:
"""
Computes the integral under the probability distribution functions (PDFs) of the distributions in the ensemble
between the given limits.
Parameters
----------
limits : tuple[Union[float, ArrayLike], Union[float, ArrayLike]]
A tuple with the limits of integration, where the first object in the tuple is
the lower limit, and the second object is the upper limit. The limit objects can
be floats or arrays, where the number of limits is the length of those arrays, or
`nlimits`.
Returns
-------
integral: ArrayLike
Value of the integral(s), with the shape (npdf, nlimits)
"""
return self.cdf(limits[1]) - self.cdf(limits[0])
[docs]
def mix_mod_fit(self, comps=5): # pragma: no cover
"""
Fits the parameters of a given functional form to an approximation
Parameters
----------
comps : int, optional
Number of components to consider
using : str, optional
Which existing approximation to use, defaults to first approximation
vb : bool
Report progress
Returns
-------
self.mix_mod: list [ `qp.Composite` ]
List of `qp.Composite` objects approximating the PDFs
Notes
-----
Currently only supports mixture of Gaussians
"""
raise NotImplementedError("mix_mod_fit %i" % comps)
[docs]
def moment_partial(self, n: int, limits: tuple, dx: float = 0.01) -> ArrayLike:
"""Return the nth moment over a particular range for each of the distributions in this ensemble.
Parameters
----------
n : int
The order of the moment to return
limits : tuple
The range over which to calculate the moment, where the second number is the
upper limit.
dx : float, optional
The distance between grid points when calculating, by default 0.01
Returns
-------
ArrayLike
Array of the moments for each of the distributions, with shape (npdf,)
"""
D = int((limits[-1] - limits[0]) / dx)
grid = np.linspace(limits[0], limits[1], D)
# dx = (limits[-1] - limits[0]) / (D - 1)
P_eval = self.gridded(grid)[1]
grid_to_n = grid**n
return quick_moment(P_eval, grid_to_n, dx)
[docs]
def plot(
self,
key: Union[int, slice] = 0,
**kwargs: str,
):
"""Plot the selected distribution as a curve.
Parameters
----------
key : int or slice, optional
The index or slice of the distribution or distributions from this ensemble
to plot, by default 0.
Other Parameters
----------------
axes : Axes
The axes to plot on. Either this or xlim must be provided.
xlim : (float, float)
The x-axis limits. Either this or axes must be provided.
kwargs :
Any keyword arguments to pass to matplotlib's axes.plot() method.
Returns
-------
axes : Axes
The plot axes
"""
return self._gen_class.plot(self[key], **kwargs)
[docs]
def plot_native(self, key: Union[int, slice] = 0, **kwargs: str):
"""Plot the selected distribution in the default format for this parameterization. To find what arguments are
required for specific parameterizations, you can check the docstrings
of ``qp.[parameterization].plot_native``, where ``[parameterization]`` is the parameterization
class for the current ensemble.
Parameters
----------
key : int or slice, optional
The index or slice of the distribution or distributions from this ensemble
to plot, by default 0.
kwargs :
The keyword arguments to pass to the parameterization's plot_native method.
Returns
-------
axes : Axes
The plot axes
"""
return self._gen_class.plot_native(self[key], **kwargs)
def _get_allocation_kwds(self, npdf: int) -> Mapping:
tables = self.build_tables()
keywords = {}
for group, tab in tables.items():
if group != "meta":
keywords[group] = {}
for key, array in tab.items():
shape = list(array.shape)
shape[0] = npdf
keywords[group][key] = (shape, array.dtype)
return keywords
[docs]
def initializeHdf5Write(
self, filename: str, npdf: int, comm=None
) -> tuple[dict[str, h5py.File | h5py.Group], h5py.File]:
"""Set up the output write for an ensemble, but set size to npdf rather than
the size of the ensemble, as the "initial chunk" will not contain the full data
Parameters
----------
filename : str
Name of the file to create
npdf : int
Total number of distributions that the file will contain,
usually larger then the size of the current ensemble
comm : MPI communicator
Optional MPI communicator to allow parallel writing
Returns
-------
group : dict[str, h5py.File | h5py.Group]
A dictionary of the groups to write to.
fout : h5py.File
The output file object that has been created.
"""
kwds = self._get_allocation_kwds(npdf)
group, fout = hdf5.initialize_HDF5_write(filename, comm=comm, **kwds)
return group, fout
[docs]
def writeHdf5Chunk(
self, fname: "h5py.File" | "h5py.Group", start: int, end: int
) -> None:
"""Write a chunk of the ensemble data to file. This will write
the data for the distributions in the slice from [start:end] to the file.
This includes the ancillary data table.
Parameters
----------
fname : h5py.File | h5py.Group
The file or group object to write to
start : int
Starting index of data to write in the h5py file
end : int
Ending index of data to write in the h5py file
"""
odict = self.build_tables(encode=True, ext="hdf5").copy()
odict.pop("meta")
hdf5.write_dict_to_HDF5_chunk(fname, odict, start, end)
[docs]
def finalizeHdf5Write(self, filename: "h5py.File" | "h5py.Group") -> None:
"""Write ensemble metadata to the output file and close the file.
Parameters
----------
filename : h5py.File | h5py.Group
The file or group object to complete writing and close.
"""
mdata = make_len_equal(self.metadata)
hdf5.finalize_HDF5_write(filename, "meta", **mdata)