import warnings
from preliz.distributions.normal import Normal
from preliz.internal.distribution_helper import valid_distribution
from preliz.internal.optimization import get_fixed_params, optimize_max_ent, relative_error
from preliz.internal.rcparams import rcParams
from preliz.ppls.pymc_io import if_pymc_get_preliz
[docs]
def maxent(
distribution=None,
lower=-1,
upper=1,
mass=None,
fixed_stat=None,
fixed_params=None,
plot=None,
plot_kwargs=None,
ax=None,
):
"""
Find the maximum entropy distribution that satisfies the constraints.
Find the maximum entropy distribution with `mass` in the interval
defined by the `lower` and `upper` end-points.
Parameters
----------
name : PreliZ or PyMC distribution
PreliZ distribution are updated inplace, while PyMC distributions are converted
to PreliZ distributions.
Distributions can be partially initialized, i.e. some parameters can be fixed
while others are left free to be estimated. For PreliZ distributions, set the parameters
you want to fix and don't set the rest. As an alternative `fixed_params` can be used.
For PyMC distributions, set the parameters to `np.nan`, and use `fixed_params` in case
you want to fix any of them.
lower : float
Lower end-point
upper: float
Upper end-point
mass: float
Probability mass between ``lower`` and ``upper`` bounds. Defaults to None,
which results in the value of rcParams["stats.ci_prob"] being used.
fixed_stat: tuple
Summary statistic to fix. The first element should be a name and the second a
numerical value. Valid names are: "mean", "mode", "median", "var", "std",
"skewness", "kurtosis". Defaults to None.
fixed_params: dict
Dictionary with parameter names as keys and the values to fix them to as values.
If using a PreliZ distribution, parameters can also be fixed by setting them
when initializing the distribution. Defaults to None.
plot : bool
Whether to plot the distribution, and lower and upper bounds. Defaults to None,
which results in the value of rcParams["plots.show_plot"] being used.
plot_kwargs : dict
Dictionary passed to the method ``plot_pdf()`` of ``distribution``.
ax : matplotlib axes
Returns
-------
dict: PreliZ distribution
axes: matplotlib axes (only if `plot=True`)
Notes
-----
After calling this function the attribute `opt` of the distribution will be updated with the
OptimizeResult object from the optimization step.
See Also
--------
quartile : Find the distribution with the specified quartiles.
Examples
--------
Calculate the maxent Gamma distribution with 90 % of the mass between 1 and 8:
.. plot::
:context: close-figs
:include-source: true
>>> import preliz as pz
>>> pz.style.use('preliz-doc')
>>> pz.maxent(pz.Gamma(), 1, 8, 0.9)
Calculate the maxent HalfStudentT T distribution with 90 % of the mass between 0 and 12
and a value of nu=4:
.. plot::
:context: close-figs
:include-source: true
>>> import preliz as pz
>>> pz.style.use('preliz-doc')
>>> pz.maxent(pz.HalfStudentT(nu=4), 0, 12, 0.9)
"""
distribution = if_pymc_get_preliz(distribution)
valid_distribution(distribution)
if fixed_params is not None:
distribution._parametrization(**fixed_params)
if mass is None:
mass = rcParams["stats.ci_prob"]
if plot is None:
plot = rcParams["plots.show_plot"]
if fixed_stat is None:
fixed_stat = ()
else:
valid_stats = ["mean", "mode", "median", "var", "std", "skewness", "kurtosis"]
if fixed_stat[0] not in valid_stats:
raise ValueError("fixed_stat should be one of the following: " + ", ".join(valid_stats))
if not 0 < mass <= 1:
raise ValueError("mass should be larger than 0 and smaller or equal to 1")
if upper <= lower:
raise ValueError("upper should be larger than lower")
if plot_kwargs is None:
plot_kwargs = {}
if distribution is None:
distribution = Normal()
if distribution.is_frozen:
raise ValueError("All parameters are fixed, at least one should be free")
distribution._check_endpoints(lower, upper)
if distribution.kind == "discrete":
if not end_points_ints(lower, upper):
warnings.warn(
f"\n{distribution.__class__.__name__} distribution is discrete, "
"but the provided bounds are not integers"
)
# Find which parameters has been fixed
none_idx, fixed_params = get_fixed_params(distribution)
# Heuristic to provide an initial guess for the optimization step
# We obtain those guesses by first approximating the mean and standard deviation
# from intervals and mass and then use those values for moment matching
distribution._fit_moments(mean=(lower + upper) / 2, sigma=((upper - lower) / 4) / mass)
if "mode" in fixed_stat:
try:
distribution.mode()
except NotImplementedError as exc:
raise ValueError(
f"{distribution.__class__.__name__} does not have a mode method"
) from exc
opt = optimize_max_ent(distribution, lower, upper, mass, none_idx, fixed_params, fixed_stat)
distribution.opt = opt
r_error, computed_mass = relative_error(distribution, lower, upper, mass)
if r_error > 0.01:
warnings.warn(
f"\nThe requested mass is {mass:.3g},\nbut the computed one is {computed_mass:.3g}",
stacklevel=2,
)
if plot:
ax = distribution.plot_pdf(**plot_kwargs)
if plot_kwargs.get("pointinterval"):
cid = -4
else:
cid = -1
ax.plot([lower, upper], [0, 0], "o", color=ax.get_lines()[cid].get_c(), alpha=0.5)
return distribution, ax
return distribution
def end_points_ints(lower, upper):
return is_integer_num(lower) and is_integer_num(upper)
def is_integer_num(obj):
if isinstance(obj, int):
return True
if isinstance(obj, float):
return obj.is_integer()
return False