Mixmod conversion example
This notebook shows how to convert to a mixmod ensemble. To drive the point home, we’ll start with an initial mixmod ensemble based on 3 normal distributions. However to make it easier to see what’s going on, we’ll focus on the first normal distribution centered at 0.5
import numpy as np
import qp
import matplotlib.pyplot as plt
mu = np.array([[0.5,1.1, 2.9], [0.5, 1.25, 2.8], [0.3, 1.9, 2.2]])
sig = np.array([[0.05,0.01,0.04], [0.05,0.01,0.02], [0.025, 0.01, 0.025]])
wt = np.array([[1,1,1], [1,1,1], [1,1,1]])
ens = qp.Ensemble(qp.mixmod, data=dict(means=mu, stds=sig, weights=wt))
ens[1].plot_native(xlim=(0,1))
<Axes: xlabel='redshift', ylabel='p(z)'>
In the following example we use the default parameters to convert the input mixmod ensemble into another mixmod ensemble. We do the conversion 10 times and plot each on top of the original distribution.
Note that the peaks of the converted distributions are not consistent.
xvals = np.linspace(0, 1., 301)
axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
for i in range(10):
ens_m = qp.convert(ens[0], 'mixmod', xvals=xvals, ncomps=3)
_ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
leg_m = axes_m.legend()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 6
3 axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
5 for i in range(10):
----> 6 ens_m = qp.convert(ens[0], 'mixmod', xvals=xvals, ncomps=3)
7 _ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
9 leg_m = axes_m.legend()
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/factory.py:622, in Factory.convert(self, in_dist, class_name, **kwds)
617 if extract_func is None: # pragma: no cover
618 raise KeyError(
619 "Class named %s does not have a extraction_method named %s"
620 % (class_name, method)
621 )
--> 622 data = extract_func(in_dist, **kwds_copy)
623 return self.create(class_name, data, method)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod_utils.py:35, in extract_mixmod_fit_samples(in_dist, **kwargs)
33 n_sample = kwargs.pop("nsamples", 1000)
34 random_state = kwargs.pop("random_state", None)
---> 35 samples = in_dist.rvs(size=n_sample, random_state=random_state)
37 def mixmod_helper(samps):
38 estimator = mixture.GaussianMixture(n_components=n_comps)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/ensemble.py:1003, in Ensemble.rvs(self, size, random_state)
967 def rvs(
968 self,
969 size: int = 1,
970 random_state: Union[None, int, np.random.Generator] = None,
971 ) -> ArrayLike:
972 """
973 Generate samples from the distributions in this ensemble.
974
(...)
1001
1002 """
-> 1003 return self._frozen.rvs(
1004 size=(self._frozen.npdf, size), random_state=random_state
1005 )
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:532, in rv_frozen.rvs(self, size, random_state)
530 kwds = self.kwds.copy()
531 kwds.update({'size': size, 'random_state': random_state})
--> 532 return self.dist.rvs(*self.args, **kwds)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:1111, in rv_generic.rvs(self, *args, **kwds)
1108 else:
1109 random_state = self._random_state
-> 1111 vals = self._rvs(*args, size=size, random_state=random_state)
1113 vals = vals * scale + loc
1115 # do not forget to restore the _random_state
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/base.py:323, in Pdf_rows_gen._rvs(self, size, random_state, *args)
320 def _rvs(self, *args, size=None, random_state=None):
321 # Use basic inverse cdf algorithm for RV generation as default.
322 U = random_state.uniform(size=size)
--> 323 Y = self._ppf(U, *args)
324 if size is None: # pragma: no cover
325 return Y
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod.py:259, in mixmod_gen._ppf(self, x, row)
257 cdf_vals = self.cdf(grid, np.expand_dims(rr, -1))
258 else: # pragma: no cover
--> 259 raise ValueError(
260 f"Oops, we can't handle this kind of input to mixmod._ppf {case_idx}"
261 )
262 return interpolate_multi_x_y(
263 x, row, cdf_vals, grid, bounds_error=False, fill_value=(min_val, max_val)
264 ).ravel()
ValueError: Oops, we can't handle this kind of input to mixmod._ppf 2
Reproducing the above plots, but this time specifying the number of random variates to be drawn from the input distribution to be 100_000 instead of the default 1000. Note that the peaks are now more consistent across the 10 iterations, but still show some jitter.
xvals = np.linspace(0, 1., 301)
axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
for i in range(10):
ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, nsamples=100_000)
_ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
leg_m = axes_m.legend()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[3], line 6
3 axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
5 for i in range(10):
----> 6 ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, nsamples=100_000)
7 _ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
9 leg_m = axes_m.legend()
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/factory.py:622, in Factory.convert(self, in_dist, class_name, **kwds)
617 if extract_func is None: # pragma: no cover
618 raise KeyError(
619 "Class named %s does not have a extraction_method named %s"
620 % (class_name, method)
621 )
--> 622 data = extract_func(in_dist, **kwds_copy)
623 return self.create(class_name, data, method)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod_utils.py:35, in extract_mixmod_fit_samples(in_dist, **kwargs)
33 n_sample = kwargs.pop("nsamples", 1000)
34 random_state = kwargs.pop("random_state", None)
---> 35 samples = in_dist.rvs(size=n_sample, random_state=random_state)
37 def mixmod_helper(samps):
38 estimator = mixture.GaussianMixture(n_components=n_comps)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/ensemble.py:1003, in Ensemble.rvs(self, size, random_state)
967 def rvs(
968 self,
969 size: int = 1,
970 random_state: Union[None, int, np.random.Generator] = None,
971 ) -> ArrayLike:
972 """
973 Generate samples from the distributions in this ensemble.
974
(...)
1001
1002 """
-> 1003 return self._frozen.rvs(
1004 size=(self._frozen.npdf, size), random_state=random_state
1005 )
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:532, in rv_frozen.rvs(self, size, random_state)
530 kwds = self.kwds.copy()
531 kwds.update({'size': size, 'random_state': random_state})
--> 532 return self.dist.rvs(*self.args, **kwds)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:1111, in rv_generic.rvs(self, *args, **kwds)
1108 else:
1109 random_state = self._random_state
-> 1111 vals = self._rvs(*args, size=size, random_state=random_state)
1113 vals = vals * scale + loc
1115 # do not forget to restore the _random_state
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/base.py:323, in Pdf_rows_gen._rvs(self, size, random_state, *args)
320 def _rvs(self, *args, size=None, random_state=None):
321 # Use basic inverse cdf algorithm for RV generation as default.
322 U = random_state.uniform(size=size)
--> 323 Y = self._ppf(U, *args)
324 if size is None: # pragma: no cover
325 return Y
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod.py:259, in mixmod_gen._ppf(self, x, row)
257 cdf_vals = self.cdf(grid, np.expand_dims(rr, -1))
258 else: # pragma: no cover
--> 259 raise ValueError(
260 f"Oops, we can't handle this kind of input to mixmod._ppf {case_idx}"
261 )
262 return interpolate_multi_x_y(
263 x, row, cdf_vals, grid, bounds_error=False, fill_value=(min_val, max_val)
264 ).ravel()
ValueError: Oops, we can't handle this kind of input to mixmod._ppf 2
Here we use the default number of samples (1000), but specify the random_state value for drawing the random variates. This results in all iterations being identical, and reproducible.
xvals = np.linspace(0, 1., 301)
axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
for i in range(10):
ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, random_state=42)
_ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
leg_m = axes_m.legend()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 6
3 axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
5 for i in range(10):
----> 6 ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, random_state=42)
7 _ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
9 leg_m = axes_m.legend()
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/factory.py:622, in Factory.convert(self, in_dist, class_name, **kwds)
617 if extract_func is None: # pragma: no cover
618 raise KeyError(
619 "Class named %s does not have a extraction_method named %s"
620 % (class_name, method)
621 )
--> 622 data = extract_func(in_dist, **kwds_copy)
623 return self.create(class_name, data, method)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod_utils.py:35, in extract_mixmod_fit_samples(in_dist, **kwargs)
33 n_sample = kwargs.pop("nsamples", 1000)
34 random_state = kwargs.pop("random_state", None)
---> 35 samples = in_dist.rvs(size=n_sample, random_state=random_state)
37 def mixmod_helper(samps):
38 estimator = mixture.GaussianMixture(n_components=n_comps)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/ensemble.py:1003, in Ensemble.rvs(self, size, random_state)
967 def rvs(
968 self,
969 size: int = 1,
970 random_state: Union[None, int, np.random.Generator] = None,
971 ) -> ArrayLike:
972 """
973 Generate samples from the distributions in this ensemble.
974
(...)
1001
1002 """
-> 1003 return self._frozen.rvs(
1004 size=(self._frozen.npdf, size), random_state=random_state
1005 )
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:532, in rv_frozen.rvs(self, size, random_state)
530 kwds = self.kwds.copy()
531 kwds.update({'size': size, 'random_state': random_state})
--> 532 return self.dist.rvs(*self.args, **kwds)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:1111, in rv_generic.rvs(self, *args, **kwds)
1108 else:
1109 random_state = self._random_state
-> 1111 vals = self._rvs(*args, size=size, random_state=random_state)
1113 vals = vals * scale + loc
1115 # do not forget to restore the _random_state
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/base.py:323, in Pdf_rows_gen._rvs(self, size, random_state, *args)
320 def _rvs(self, *args, size=None, random_state=None):
321 # Use basic inverse cdf algorithm for RV generation as default.
322 U = random_state.uniform(size=size)
--> 323 Y = self._ppf(U, *args)
324 if size is None: # pragma: no cover
325 return Y
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod.py:259, in mixmod_gen._ppf(self, x, row)
257 cdf_vals = self.cdf(grid, np.expand_dims(rr, -1))
258 else: # pragma: no cover
--> 259 raise ValueError(
260 f"Oops, we can't handle this kind of input to mixmod._ppf {case_idx}"
261 )
262 return interpolate_multi_x_y(
263 x, row, cdf_vals, grid, bounds_error=False, fill_value=(min_val, max_val)
264 ).ravel()
ValueError: Oops, we can't handle this kind of input to mixmod._ppf 2
Finally, holding random_state constant, and increasing the number of samples results in the converted distributions consistently having the same values and showing a better reproduction of the input distribution.
xvals = np.linspace(0, 1., 301)
axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
for i in range(10):
ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, nsamples=100_000, random_state=42)
_ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
leg_m = axes_m.legend()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[5], line 6
3 axes_m = ens.plot_native(xlim=(0,1.), label='Original', marker='.')
5 for i in range(10):
----> 6 ens_m = qp.convert(ens, 'mixmod', xvals=xvals, ncomps=3, nsamples=100_000, random_state=42)
7 _ = ens_m.plot(axes=axes_m, label=f'iteration_{i}', color=np.random.rand(3,))
9 leg_m = axes_m.legend()
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/factory.py:622, in Factory.convert(self, in_dist, class_name, **kwds)
617 if extract_func is None: # pragma: no cover
618 raise KeyError(
619 "Class named %s does not have a extraction_method named %s"
620 % (class_name, method)
621 )
--> 622 data = extract_func(in_dist, **kwds_copy)
623 return self.create(class_name, data, method)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod_utils.py:35, in extract_mixmod_fit_samples(in_dist, **kwargs)
33 n_sample = kwargs.pop("nsamples", 1000)
34 random_state = kwargs.pop("random_state", None)
---> 35 samples = in_dist.rvs(size=n_sample, random_state=random_state)
37 def mixmod_helper(samps):
38 estimator = mixture.GaussianMixture(n_components=n_comps)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/core/ensemble.py:1003, in Ensemble.rvs(self, size, random_state)
967 def rvs(
968 self,
969 size: int = 1,
970 random_state: Union[None, int, np.random.Generator] = None,
971 ) -> ArrayLike:
972 """
973 Generate samples from the distributions in this ensemble.
974
(...)
1001
1002 """
-> 1003 return self._frozen.rvs(
1004 size=(self._frozen.npdf, size), random_state=random_state
1005 )
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:532, in rv_frozen.rvs(self, size, random_state)
530 kwds = self.kwds.copy()
531 kwds.update({'size': size, 'random_state': random_state})
--> 532 return self.dist.rvs(*self.args, **kwds)
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:1111, in rv_generic.rvs(self, *args, **kwds)
1108 else:
1109 random_state = self._random_state
-> 1111 vals = self._rvs(*args, size=size, random_state=random_state)
1113 vals = vals * scale + loc
1115 # do not forget to restore the _random_state
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/base.py:323, in Pdf_rows_gen._rvs(self, size, random_state, *args)
320 def _rvs(self, *args, size=None, random_state=None):
321 # Use basic inverse cdf algorithm for RV generation as default.
322 U = random_state.uniform(size=size)
--> 323 Y = self._ppf(U, *args)
324 if size is None: # pragma: no cover
325 return Y
File ~/checkouts/readthedocs.org/user_builds/qp/envs/stable/lib/python3.10/site-packages/qp/parameterizations/mixmod/mixmod.py:259, in mixmod_gen._ppf(self, x, row)
257 cdf_vals = self.cdf(grid, np.expand_dims(rr, -1))
258 else: # pragma: no cover
--> 259 raise ValueError(
260 f"Oops, we can't handle this kind of input to mixmod._ppf {case_idx}"
261 )
262 return interpolate_multi_x_y(
263 x, row, cdf_vals, grid, bounds_error=False, fill_value=(min_val, max_val)
264 ).ravel()
ValueError: Oops, we can't handle this kind of input to mixmod._ppf 2