<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/56656>56656</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
std::poisson_distribution and std::negative_binomial_distribution are terrible for small types like int8_t
</td>
</tr>
<tr>
<th>Labels</th>
<td>
libc++
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
ldionne
</td>
</tr>
</table>
<pre>
The following tests fail miserably for `std::negative_binomial_distribution` on `int8_t` and `uint8_t`. We have similar problems for `std::poisson_distribution`.
Basically, I think we need to re-think the algorithm we use for `std::poisson_distribution` (which is what `negative_binomial_distribution` uses too). That might require an ABI break, however breaking the ABI of random distributions might be acceptable.
```cpp
#include <random>
#include <numeric>
#include <vector>
#include <cassert>
template <class T>
T sqr(T x) {
return x * x;
}
template <class T>
void test2() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(30, .03125);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.01);
}
template <class T>
void test3() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(40, .25);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}
template <class T>
void test5() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(127, 0.5);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.04);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.05);
}
template <class T>
void test6() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(1, 0.05);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}
template <class T>
void tests() {
test2<T>();
test3<T>();
test5<T>();
test6<T>();
}
int main(int, char**) {
tests<int8_t>();
tests<uint8_t>();
return 0;
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJztWkuP4jgQ_jXhYhEFBwIcOHTD9mguc2ppj8hJDPHiPMYP6N5fv2U7CQRCP2a0Uo8UFAG2q776XFXOR6sTl-nr6jmjaFdyXp5YsUeKSiXRjjCOciapIDF_hWWBvCiQKvXCB7gKuieKHek2ZkWZM8K3KZNKsFgrVhZgicrCOLBCLbbKjEmRmgndzvjob4oycqRIspxxIlAlypjTXN5Eq0omZVlcx_C9YOMFD-79kUiWEM5fPbxG35HKWHFAJ4oKSlOkSiTo2M0p2C3h-1IwleXGQkv60YjIw4tTxpIMMYlOGVHG6f1cQAQJHEoPL330bNxyts8UcPqpmQA6BXp4_I5iQcnB0M_KEz1S4SZsTYCzsSh3SEAiyxxdhpA1XAxISUIrBSWjneQACXclVVXP4JAVCdcpRV64dqBe-FffYqFzKlhyZ_VIE1WKO4sJkdBB6rxq3xXNK06Us-Bggp5bi2ckfwrI8jN6gWwhb_7o5hG8BFVaFOgFqvAAy2G95M03H0U_liy1DY4hxA2-eq1oSnfoo10erg0y2rRMekFyNVkuwzn61jH7hvad8QalQCkMTPn9IJzgGdDrWCRQaIXg-KAfsLUNmgT21bFpgzZVWRs6BcmpYWkWBJWaq62ZNtx1x92eArwwMZiNYdDt1zX6Yb56-BEuZqi1Tp0UNhm4ExIdLazZ6v56f-ZVtwtepH7OiqZG4do4gSuO4LIYdgaMyIsz6gPTfqVlto1JAodqcbw2advGDNJSw5lBOYWjaKDbRMKB0rk2_QQY2o_pvuFlKqV9WqTtsBv-_ZcLCu6B2wFs8AkgJfuX1piXfGuKR7hRNqW5WZQHerq_etBClRLuXLcWdeV1Idm-gBvmTfmvaH2sE-q4acwtnAngzcBptkFjm-u-sjVO2BXC3g0Aoc_U5gKImF7AN6suGfWyoQB3jR6zc1YapD7LTre4uE_G-r1ypfTYbSjYkGlwgLj2qPnewDo6Bgc-jd-l0wX7fkfH9Q3HsXEL-7i_bNvzkPqHC0z4OoEKpn7Vnj7bu-eJXrSmdRvc9z3afgYj3BfxOq3v8rwbqnM4Ir9md0Z7bFrxmsKvRG3vc-f7TCyt9cKmZlznqIlxMYKzGPjB5BOIJu1jl_4zXjP4PJytybguzhmwHX0e8dyKF3U4I3dmetB_Rf_Dr6n_U6f_g_gP4j-I_yD-g_gP4j-I_7X4h78r_rOvKf4TPDeyEviD-g_qP6j_oP6D-g_q_weq__R_Vf-OOP6K-kdfVP2d9geD-A_iP4j_IP6D-A_i_weK_xf_01_2ir99IMBJek9Xuv8XvLk8e3s56l--2oCR35xYmYGvRmaSzFTrwV49nCXA1o_T3A1tbPRbRvVDFUEPq-ahETccpaswXYZLMlJMcbp681kZ-7jPB39MISIocBUw5u5JHJkTzu3vCYk4O1DkNjDSgq8ypSppYOG-h5_2TGU69pMyhwHnx-ZjXInyH_gtBEMgpynU_WkWRbNolK3mGC_nOJiTJFzOI7xY0EUIWkx3OIrmOCUjTmLK5Qoky8OYszhxogcDULARW-EAgzueTKZTjOf-bjmfhgsahZMojCYz4k0DCnXkviHil2I_EivLKdZ7CYsc9i7Pi9CpRnupjQf4RKusBI8UUlPQkaW_stz_A1-F9AM">