Source code for preliz.unidimensional.maxent

import warnings

from preliz.distributions.normal import Normal
from preliz.internal.distribution_helper import valid_distribution
from preliz.internal.optimization import get_fixed_params, optimize_max_ent, relative_error
from preliz.internal.rcparams import rcParams
from preliz.ppls.pymc_io import if_pymc_get_preliz


[docs] def maxent( distribution=None, lower=-1, upper=1, mass=None, fixed_stat=None, fixed_params=None, plot=None, plot_kwargs=None, ax=None, ): """ Find the maximum entropy distribution that satisfies the constraints. Find the maximum entropy distribution with `mass` in the interval defined by the `lower` and `upper` end-points. Parameters ---------- name : PreliZ or PyMC distribution PreliZ distribution are updated inplace, while PyMC distributions are converted to PreliZ distributions. Distributions can be partially initialized, i.e. some parameters can be fixed while others are left free to be estimated. For PreliZ distributions, set the parameters you want to fix and don't set the rest. As an alternative `fixed_params` can be used. For PyMC distributions, set the parameters to `np.nan`, and use `fixed_params` in case you want to fix any of them. lower : float Lower end-point upper: float Upper end-point mass: float Probability mass between ``lower`` and ``upper`` bounds. Defaults to None, which results in the value of rcParams["stats.ci_prob"] being used. fixed_stat: tuple Summary statistic to fix. The first element should be a name and the second a numerical value. Valid names are: "mean", "mode", "median", "var", "std", "skewness", "kurtosis". Defaults to None. fixed_params: dict Dictionary with parameter names as keys and the values to fix them to as values. If using a PreliZ distribution, parameters can also be fixed by setting them when initializing the distribution. Defaults to None. plot : bool Whether to plot the distribution, and lower and upper bounds. Defaults to None, which results in the value of rcParams["plots.show_plot"] being used. plot_kwargs : dict Dictionary passed to the method ``plot_pdf()`` of ``distribution``. ax : matplotlib axes Returns ------- dict: PreliZ distribution axes: matplotlib axes (only if `plot=True`) Notes ----- After calling this function the attribute `opt` of the distribution will be updated with the OptimizeResult object from the optimization step. See Also -------- quartile : Find the distribution with the specified quartiles. Examples -------- Calculate the maxent Gamma distribution with 90 % of the mass between 1 and 8: .. plot:: :context: close-figs :include-source: true >>> import preliz as pz >>> pz.style.use('preliz-doc') >>> pz.maxent(pz.Gamma(), 1, 8, 0.9) Calculate the maxent HalfStudentT T distribution with 90 % of the mass between 0 and 12 and a value of nu=4: .. plot:: :context: close-figs :include-source: true >>> import preliz as pz >>> pz.style.use('preliz-doc') >>> pz.maxent(pz.HalfStudentT(nu=4), 0, 12, 0.9) """ distribution = if_pymc_get_preliz(distribution) valid_distribution(distribution) if fixed_params is not None: distribution._parametrization(**fixed_params) if mass is None: mass = rcParams["stats.ci_prob"] if plot is None: plot = rcParams["plots.show_plot"] if fixed_stat is None: fixed_stat = () else: valid_stats = ["mean", "mode", "median", "var", "std", "skewness", "kurtosis"] if fixed_stat[0] not in valid_stats: raise ValueError("fixed_stat should be one of the following: " + ", ".join(valid_stats)) if not 0 < mass <= 1: raise ValueError("mass should be larger than 0 and smaller or equal to 1") if upper <= lower: raise ValueError("upper should be larger than lower") if plot_kwargs is None: plot_kwargs = {} if distribution is None: distribution = Normal() if distribution.is_frozen: raise ValueError("All parameters are fixed, at least one should be free") distribution._check_endpoints(lower, upper) if distribution.kind == "discrete": if not end_points_ints(lower, upper): warnings.warn( f"\n{distribution.__class__.__name__} distribution is discrete, " "but the provided bounds are not integers" ) # Find which parameters has been fixed none_idx, fixed_params = get_fixed_params(distribution) # Heuristic to provide an initial guess for the optimization step # We obtain those guesses by first approximating the mean and standard deviation # from intervals and mass and then use those values for moment matching distribution._fit_moments(mean=(lower + upper) / 2, sigma=((upper - lower) / 4) / mass) if "mode" in fixed_stat: try: distribution.mode() except NotImplementedError as exc: raise ValueError( f"{distribution.__class__.__name__} does not have a mode method" ) from exc opt = optimize_max_ent(distribution, lower, upper, mass, none_idx, fixed_params, fixed_stat) distribution.opt = opt r_error, computed_mass = relative_error(distribution, lower, upper, mass) if r_error > 0.01: warnings.warn( f"\nThe requested mass is {mass:.3g},\nbut the computed one is {computed_mass:.3g}", stacklevel=2, ) if plot: ax = distribution.plot_pdf(**plot_kwargs) if plot_kwargs.get("pointinterval"): cid = -4 else: cid = -1 ax.plot([lower, upper], [0, 0], "o", color=ax.get_lines()[cid].get_c(), alpha=0.5) return distribution, ax return distribution
def end_points_ints(lower, upper): return is_integer_num(lower) and is_integer_num(upper) def is_integer_num(obj): if isinstance(obj, int): return True if isinstance(obj, float): return obj.is_integer() return False