Source code for preliz.unidimensional.quartile

import warnings

import numpy as np

from preliz.distributions.normal import Normal
from preliz.internal.distribution_helper import valid_distribution
from preliz.internal.optimization import get_fixed_params, optimize_quartile, relative_error
from preliz.internal.rcparams import rcParams
from preliz.ppls.pymc_io import if_pymc_get_preliz


[docs] def quartile( distribution=None, q1=-1, q2=0, q3=1, fixed_params=None, plot=None, plot_kwargs=None, ax=None, ): """ Find the distribution with the specified quartiles. Parameters ---------- distribution : PreliZ or PyMC distribution PreliZ distribution are updated inplace, while PyMC distributions are converted to PreliZ distributions. Distributions can be partially initialized, i.e. some parameters can be fixed while others are left free to be estimated. For PreliZ distributions, set the parameters you want to fix and don't set the rest. As an alternative `fixed_params` can be used. For PyMC distributions, set the parameters to `np.nan`, and use `fixed_params` in case you want to fix any of them. q1 : float First quartile, i.e 0.25 of the mass is below this point. q2 : float Second quartile, i.e 0.50 of the mass is below this point. This is also know as the median. q3 : float Third quartile, i.e 0.75 of the mass is below this point. fixed_params: dict Dictionary with parameter names as keys and the values to fix them to as values. If using a PreliZ distribution, parameters can also be fixed by setting them when initializing the distribution. Defaults to None. plot : bool Whether to plot the distribution, and lower and upper bounds. Defaults to None, which results in the value of rcParams["plots.show_plot"] being used. plot_kwargs : dict Dictionary passed to the method ``plot_pdf()`` of ``distribution``. ax : matplotlib axes Returns ------- dict: dict with the parameters of the distribution axes: matplotlib axes (only if `plot=True`) Notes ----- After calling this function the attribute `opt` of the distribution will be updated with the OptimizeResult object from the optimization step. See Also -------- maxent : Find the maximum entropy distribution with a given mass inside a user defined interval. Examples -------- Calculate the Gamma distribution with quartiles 3, 6 and 8: .. plot:: :context: close-figs :include-source: true >>> import preliz as pz >>> pz.style.use('preliz-doc') >>> pz.quartile(pz.Gamma(), 3, 6, 8) Calculate the HalfStudentT T distribution with quartiles 2, 9 and 12 and a value of nu=7: .. plot:: :context: close-figs :include-source: true >>> import preliz as pz >>> pz.style.use('preliz-doc') >>> pz.quartile(pz.HalfStudentT(nu=7), 2, 9, 12) """ distribution = if_pymc_get_preliz(distribution) valid_distribution(distribution) if fixed_params is not None: distribution._parametrization(**fixed_params) if plot is None: plot = rcParams["plots.show_plot"] if plot_kwargs is None: plot_kwargs = {} if not q1 < q2 < q3: raise ValueError("The order of the quartiles should be q1 < q2 < q3") quartiles = np.array([q1, q2, q3]) if distribution is None: distribution = Normal() if distribution.is_frozen: raise ValueError("All parameters are fixed, at least one should be free") distribution._check_endpoints(q1, q3) # Find which parameters has been fixed none_idx, fixed = get_fixed_params(distribution) # Heuristic to provide an initial guess for the optimization step # We obtain those guesses by first approximating the mean and standard deviation # from the quartiles and then use those values for moment matching distribution._fit_moments(mean=q2, sigma=(q3 - q1) / 1.35) opt = optimize_quartile(distribution, quartiles, none_idx, fixed) distribution.opt = opt r_error, _ = relative_error(distribution, q1, q3, 0.5) if r_error > 0.01: computed_masses = distribution.cdf(quartiles).astype(float) warnings.warn( f"\nThe expected masses are 0.25, 0.5, 0.75\n" f"The computed ones are: {computed_masses[0]:.2g}, " f"{computed_masses[1]:.2g}, {computed_masses[2]:.2g}", stacklevel=2, ) if plot: ax = distribution.plot_pdf(**plot_kwargs) if plot_kwargs.get("pointinterval"): cid = -4 else: cid = -1 ax.plot(quartiles, [0, 0, 0], "o", color=ax.get_lines()[cid].get_c(), alpha=0.5) return distribution, ax return distribution