Source code for qp.metrics.array_metrics
"""This module implements metric calculations that are independent of qp.Ensembles"""
import numpy as np
from scipy import stats
from scipy.integrate import quad
from scipy.optimize import minimize_scalar
from qp.utils import safelog
[docs]def quick_anderson_ksamp(p_random_variables, q_random_variables, **kwargs):
"""Calculate the k-sample Anderson-Darling statistic using scipy.stats.anderson_ksamp for two CDFs.
For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html
Parameters
----------
p_random_variables : np.array
An array of random variables from the given distribution
q_random_variables : np.array
An array of random variables from the given distribution
Returns
-------
[Result objects]
A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``.
"""
return stats.anderson_ksamp([p_random_variables, q_random_variables], **kwargs)
[docs]def quick_kld(p_eval, q_eval, dx=0.01):
"""
Calculates the Kullback-Leibler Divergence between two evaluations of PDFs.
Parameters
----------
p_eval: numpy.ndarray, float
evaluations of probability distribution closer to the truth
q_eval: numpy.ndarray, float
evaluations of probability distribution that approximates p
dx: float
resolution of integration grid
Returns
-------
Dpq: float
the value of the Kullback-Leibler Divergence from `q` to `p`
"""
# safelog would be easy to isolate if array_metrics is ever extracted
logquotient = safelog(p_eval) - safelog(q_eval)
# Calculate the KLD from q to p
Dpq = dx * np.sum(p_eval * logquotient, axis=-1)
return Dpq
[docs]def quick_moment(p_eval, grid_to_N, dx):
"""
Calculates a moment of an evaluated PDF
Parameters
----------
p_eval: numpy.ndarray, float
the values of a probability distribution
grid: numpy.ndarray, float
the grid upon which p_eval was evaluated
dx: float
the difference between regular grid points
N: int
order of the moment to be calculated
Returns
-------
M: float
value of the moment
"""
M = np.dot(p_eval, grid_to_N) * dx
return M
[docs]def quick_rmse(p_eval, q_eval, N):
"""
Calculates the Root Mean Square Error between two evaluations of PDFs.
Parameters
----------
p_eval: numpy.ndarray, float
evaluation of probability distribution function whose distance between
its truth and the approximation of `q` will be calculated.
q_eval: numpy.ndarray, float
evaluation of probability distribution function whose distance between
its approximation and the truth of `p` will be calculated.
N: int
number of points at which PDFs were evaluated
Returns
-------
rms: float
the value of the RMS error between `q` and `p`
"""
# Calculate the RMS between p and q
rms = np.sqrt(np.sum((p_eval - q_eval) ** 2, axis=-1) / N)
return rms
[docs]def quick_rbpe(pdf_function, integration_bounds, limits=(np.inf, np.inf)):
"""
Calculates the risk based point estimate of a qp.Ensemble object with npdf == 1.
Parameters
----------
pdf_function, python function
The function should calculate the value of a pdf at a given x value
integration_bounds, 2-tuple of floats
The integration bounds - typically (ppf(0.01), ppf(0.99)) for the given distribution
limits, tuple of floats
The limits at which to evaluate possible z_best estimates.
If custom limits are not provided then all potential z value will be
considered using the scipy.optimize.minimize_scalar function.
Returns
-------
rbpe: float
The risk based point estimate of the provided ensemble.
"""
def calculate_loss(x):
return 1.0 - (1.0 / (1.0 + (pow((x / 0.15), 2))))
lower = integration_bounds[0]
upper = integration_bounds[1]
def find_z_risk(zp):
def integrand(z):
return pdf_function(z) * calculate_loss((zp - z) / (1.0 + z))
return quad(integrand, lower, upper)[0]
if limits[0] == np.inf:
return minimize_scalar(find_z_risk).x
return minimize_scalar(
find_z_risk, bounds=(limits[0], limits[1]), method="bounded"
).x