Source code for preliz.unidimensional.combine

import numpy as np

from preliz.internal.distribution_helper import get_distributions
from preliz.unidimensional.mle import mle



[docs]
def combine(
    distributions,
    weights=None,
    dist_names=None,
    sample_size=10_000,
    rng=0,
    plot=1,
    plot_kwargs=None,
    ax=None,
):
    """
    Combine a set of distributions into a single one.

    Fit a weighted sample from ``distributions`` into the distributions listed in ``dist_names`.
    The fit is done using maximum likelihood estimation, and the best match is plotted.
    Notice that the result is NOT a Mixture distribution, but a single distribution
    that best fits the weighted sample.

    Parameters
    ----------
    distributions : List of PreliZ distributions
        These are the distributions that we want to combine. Typically, these have been
        elicited from different individuals or instances.
    weights : array-like, optional
        Weights for each distribution. Defaults to None, i.e. equal weights.
        The sum of the weights must be equal to 1, otherwise it will be normalized.
    dist_names: list
        List of distributions to fit the weighted sample.
        Defaults to ``["Normal", "Gamma", "LogNormal", "StudentT"]``.
    sample_size : int
        Number of total samples to generate for the fit.
    rng : int or numpy.random.Generator, optional
        Random number generator or seed. Defaults to ``0``.
    plot : int
        Number of distributions to plots. Defaults to ``1`` (i.e. plot the best match)
        If larger than the number of passed distributions it will plot all of them.
        Use ``0`` or ``False`` to disable plotting.
    plot_kwargs : dict
        Dictionary passed to the method ``plot_pdf()``.
    ax : matplotlib axes

    Returns
    -------
    fitted_distributions : list of PreliZ distributions.
        The distributions that best fit the weighted sample. Sorted from best to worst match.
    axes : matplotlib axes
    """
    if rng is None:
        rng = np.random.default_rng()
    else:
        rng = np.random.default_rng(rng)

    if weights is None:
        weights = np.full(len(distributions), 1 / len(distributions))
    else:
        weights = np.array(weights, dtype=float)

    if np.any(weights < 0):
        raise ValueError("The weights must be positive.")

    weights /= weights.sum()

    n_size = (sample_size * weights).astype(int)

    if dist_names is None:
        dist_names = ["Normal", "Gamma", "LogNormal", "StudentT"]

    sample = []
    for dist, n in zip(distributions, n_size):
        sample.append(dist.rvs(n, random_state=rng))

    distributions = get_distributions(dist_names)

    idx, ax = mle(distributions, np.concatenate(sample), plot=plot, plot_kwargs=plot_kwargs, ax=ax)

    return np.array(distributions)[idx], ax