Source code for preliz.distributions.hurdle

import numpy as np

from preliz.distributions.distributions import DistributionTransformer
from preliz.internal.distribution_helper import all_not_none, eps


[docs] class Hurdle(DistributionTransformer): r""" Hurdle distribution. This is not a distribution per se, but a modifier of univariate distributions. Given a base distribution with parameters $\theta$, cumulative distribution function (CDF), and probability density mass/function (PDF). The density of a Hurdle distribution is: .. math:: f(x \mid \psi, \mu) = \left\{ \begin{array}{l} (1 - \psi) \ \text{if } x = 0 \\ \psi \frac{\text{PDF}(x \mid \theta))} {1 - \text{CDF}(\epsilon \mid \theta)} \ \text{if } x \neq 0 \end{array} \right. where $\psi$ is the expected proportion of the base distribution (0 < $\psi$ < 1), and $\epsilon$ is the machine precision for continuous distributions and 0 for discrete ones. The following figure shows the difference between a Gamma distribution and a HurdleGamma, with the same parameters for the base distribution (Gamma). .. plot:: :context: close-figs from preliz import Gamma, Hurdle, style style.use('preliz-doc') Hurdle(Gamma(mu=2, sigma=1), 0.8).plot_pdf() Gamma(mu=2, sigma=1).plot_pdf() Parameters ---------- dist: PreliZ distribution Univariate PreliZ distribution which will be truncated. psi : float Expected proportion of the base distribution (0 < psi < 1) """ def __init__(self, dist, psi=None, **kwargs): self.dist = dist super().__init__() self._parametrization(psi, **kwargs) def _parametrization(self, psi=None, **kwargs): dist_params = [] if not kwargs: if hasattr(self.dist, "params"): kwargs = dict(zip(self.dist.param_names, self.dist.params)) else: kwargs = dict(zip(self.dist.param_names, [None] * len(self.dist.param_names))) for key, value in kwargs.items(): dist_params.append(value) setattr(self, key, value) self.psi = psi self.params = (*dist_params, self.psi) self.param_names = (*self.dist.param_names, "psi") if all_not_none(*dist_params): self.dist._parametrization(**kwargs) self.is_frozen = True self.support = self.dist.support self.params_support = (*self.dist.params_support, (0, 1))
[docs] def mean(self): x_values = self.xvals("full") pdf = self.pdf(x_values) if self.kind == "discrete": return np.sum(x_values * pdf) else: return np.trapezoid(x_values * pdf, x_values)
[docs] def mode(self): if self.kind == "discrete": from preliz.internal.optimization import find_discrete_mode return find_discrete_mode(self) else: from preliz.internal.optimization import find_mode return find_mode(self)
[docs] def median(self): return self.ppf(0.5)
[docs] def var(self): x_values = self.xvals("full") pdf = self.pdf(x_values) if self.kind == "discrete": return np.sum((x_values - self.mean()) ** 2 * pdf) else: return np.trapezoid((x_values - self.mean()) ** 2 * pdf, x_values)
[docs] def std(self): return self.var() ** 0.5
[docs] def rvs(self, size=None, random_state=None): random_state = np.random.default_rng(random_state) return self.ppf(random_state.uniform(size=size))
[docs] def pdf(self, x): if self.dist == "discrete": return np.where( x == 0, 1 - self.psi, self.psi * self.dist.pdf(x) / (1 - self.dist.cdf(0)) ) else: return np.where( x == 0, 1 - self.psi, self.psi * self.dist.pdf(x) / (1 - self.dist.cdf(eps)) )
[docs] def cdf(self, x): if self.dist == "discrete": return np.where( x <= 0, 1 - self.psi, 1 - self.psi * (1 - self.dist.cdf(x)) / (1 - self.dist.cdf(0)) ) else: return np.where( x <= 0, 1 - self.psi, 1 - self.psi * (1 - self.dist.cdf(x)) / (1 - self.dist.cdf(eps)), )
[docs] def ppf(self, q): if self.kind == "discrete": lower = 0 else: lower = eps q = np.asarray(q) psi = self.psi lcdf = self.dist.cdf(lower) vals = np.zeros_like(q) vals[q >= (1 - psi)] = self.dist.ppf( lcdf + (q[q >= (1 - psi)] - (1 - psi)) * (1 - lcdf) / psi ) return np.where((q < 0) | (q > 1), np.nan, vals)
[docs] def logpdf(self, x): if self.dist == "discrete": pdf_values = np.where( x == 0, np.log(1 - self.psi), np.log(self.psi) + self.dist.logpdf(x) - np.log(1 - self.dist.cdf(0)), ) else: pdf_values = np.where( x == 0, np.log(1 - self.psi), np.log(self.psi) + self.dist.logpdf(x) - np.log(1 - self.dist.cdf(eps)), ) return pdf_values
[docs] def entropy(self): x_values = self.xvals("restricted") logpdf = self.logpdf(x_values) if self.kind == "discrete": return -np.sum(np.exp(logpdf) * logpdf) else: return -np.trapezoid(np.exp(logpdf) * logpdf, x_values)
def _fit_moments(self, mean, sigma): self.dist._fit_moments(mean, sigma) self._parametrization(**dict(zip(self.dist.param_names, self.dist.params)))