[libcxx-commits] [libcxx] b254c2e - [libc++] Fix `uniform_int_distribution` for 128-bit result type

Wed Dec 1 08:03:45 PST 2021

Author: Fabian Wolff
Date: 2021-12-01T11:03:29-05:00
New Revision: b254c2e2c4aa3298655282b8db37860129bcd7b6

URL: https://github.com/llvm/llvm-project/commit/b254c2e2c4aa3298655282b8db37860129bcd7b6
DIFF: https://github.com/llvm/llvm-project/commit/b254c2e2c4aa3298655282b8db37860129bcd7b6.diff

LOG: [libc++] Fix `uniform_int_distribution` for 128-bit result type

Fixes https://llvm.org/PR51520. The problem is that `uniform_int_distribution`
currently uses an unsigned integer with at most 64 bits internally, which
is then casted to the desired result type. If the result type is `int64_t`,
this will produce a negative number if the most significant bit is set,
but if the result type is `__int128_t`, the value remains non-negative
and will be out of bounds for the example in PR#51520. (The reason why
it also seems to work if the upper or lower bound is changed is
because the branch at [1] will then no longer be taken, and proper
rejection sampling takes place.)

The bigger issue here is probably that `uniform_int_distribution` can be
instantiated with `__int128_t` but will silently produce incorrect results
(only the lowest 64 bits can ever be set). libstdc++ also supports `__int128_t`
as a result type, so I have simply extended the maximum width of the
internal intermediate result type.

[1]: https://github.com/llvm/llvm-project/blob/6d28dffb6/libcxx/include/__random/uniform_int_distribution.h#L266-L267

Differential Revision: https://reviews.llvm.org/D114129

Added: 
    libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp

Modified: 
    libcxx/include/__random/log2.h
    libcxx/include/__random/uniform_int_distribution.h

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__random/log2.h b/libcxx/include/__random/log2.h
index c2d9b6b69b272..3d9640c1f7871 100644

--- a/libcxx/include/__random/log2.h
+++ b/libcxx/include/__random/log2.h
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <cstddef>
+#include <type_traits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
@@ -18,30 +19,54 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _UIntType, _UIntType _Xp, size_t _Rp>
+struct __log2_imp;
+
 template <unsigned long long _Xp, size_t _Rp>
-struct __log2_imp
+struct __log2_imp<unsigned long long, _Xp, _Rp>
 {
     static const size_t value = _Xp & ((unsigned long long)(1) << _Rp) ? _Rp
-                                           : __log2_imp<_Xp, _Rp - 1>::value;
+                                           : __log2_imp<unsigned long long, _Xp, _Rp - 1>::value;
 };
 
 template <unsigned long long _Xp>
-struct __log2_imp<_Xp, 0>
+struct __log2_imp<unsigned long long, _Xp, 0>
 {
     static const size_t value = 0;
 };
 
 template <size_t _Rp>
-struct __log2_imp<0, _Rp>
+struct __log2_imp<unsigned long long, 0, _Rp>
 {
     static const size_t value = _Rp + 1;
 };
 
+#ifndef _LIBCPP_HAS_NO_INT128
+
+template <__uint128_t _Xp, size_t _Rp>
+struct __log2_imp<__uint128_t, _Xp, _Rp>
+{
+    static const size_t value = (_Xp >> 64)
+        ? (64 + __log2_imp<unsigned long long, (_Xp >> 64), 63>::value)
+        : __log2_imp<unsigned long long, _Xp, 63>::value;
+};
+
+#endif // _LIBCPP_HAS_NO_INT128
+
 template <class _UIntType, _UIntType _Xp>
 struct __log2
 {
-    static const size_t value = __log2_imp<_Xp,
-                                         sizeof(_UIntType) * __CHAR_BIT__ - 1>::value;
+    static const size_t value = __log2_imp<
+#ifndef _LIBCPP_HAS_NO_INT128
+        typename conditional<
+                sizeof(_UIntType) <= sizeof(unsigned long long),
+                    unsigned long long,
+                    __uint128_t
+            >::type,
+#else
+        unsigned long long,
+#endif // _LIBCPP_HAS_NO_INT128
+        _Xp, sizeof(_UIntType) * __CHAR_BIT__ - 1>::value;
 };
 
 _LIBCPP_END_NAMESPACE_STD

diff  --git a/libcxx/include/__random/uniform_int_distribution.h b/libcxx/include/__random/uniform_int_distribution.h
index 3f47d5f4707dc..55b4761637f05 100644
--- a/libcxx/include/__random/uniform_int_distribution.h
+++ b/libcxx/include/__random/uniform_int_distribution.h
@@ -12,6 +12,7 @@
 #include <__bits>
 #include <__config>
 #include <__random/log2.h>
+#include <bit>
 #include <cstddef>
 #include <cstdint>
 #include <iosfwd>
@@ -154,7 +155,7 @@ __independent_bits_engine<_Engine, _UIntType>::__eval(true_type)
     return _Sp;
 }
 
-template<class _IntType = int>
+template<class _IntType = int> // __int128_t is also supported as an extension here
 class uniform_int_distribution
 {
 public:
@@ -229,8 +230,8 @@ typename uniform_int_distribution<_IntType>::result_type
 uniform_int_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p)
 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 {
-    typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t),
-                                            uint32_t, uint64_t>::type _UIntType;
+    typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t), uint32_t,
+                                 typename make_unsigned<result_type>::type>::type _UIntType;
     const _UIntType _Rp = _UIntType(__p.b()) - _UIntType(__p.a()) + _UIntType(1);
     if (_Rp == 1)
         return __p.a();
@@ -238,7 +239,7 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
     typedef __independent_bits_engine<_URNG, _UIntType> _Eng;
     if (_Rp == 0)
         return static_cast<result_type>(_Eng(__g, _Dt)());
-    size_t __w = _Dt - __libcpp_clz(_Rp) - 1;
+    size_t __w = _Dt - __countl_zero(_Rp) - 1;
     if ((_Rp & (numeric_limits<_UIntType>::max() >> (_Dt - __w))) != 0)
         ++__w;
     _Eng __e(__g, __w);

diff  --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp
new file mode 100644
index 0000000000000..ab8bda7058c3c
--- /dev/null
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp
@@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: c++03
+
+// <random>
+
+// template<class _IntType = int>
+// class uniform_int_distribution
+
+// template<class _URNG> result_type operator()(_URNG& g);
+
+#include <random>
+#include <cassert>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+
+#ifndef _LIBCPP_HAS_NO_INT128
+
+  // Test that values outside of the 64-bit range can be produced.
+  {
+    std::minstd_rand0 e;
+    std::uniform_int_distribution<__int128_t> d;
+    assert(d.min() == 0 && d.max() == std::numeric_limits<__int128_t>::max());
+    bool all_in_64bit_range = true;
+    for (int i = 0; i < 100; ++i) {
+      __int128_t n = d(e);
+      all_in_64bit_range = all_in_64bit_range && (n <= UINT64_MAX);
+    }
+    assert(!all_in_64bit_range);
+  }
+
+  // Same test as above with min/max set and outside the 64-bit range.
+  {
+    __int128_t a = ((__int128_t)INT64_MIN) * 10;
+    __int128_t b = ((__int128_t)INT64_MAX) * 10;
+    std::minstd_rand0 e;
+    std::uniform_int_distribution<__int128_t> d(a, b);
+    assert(d.min() == a && d.max() == b);
+    bool all_in_64bit_range = true;
+    for (int i = 0; i < 100; ++i) {
+      __int128_t n = d(e);
+      assert(a <= n && n <= b);
+      all_in_64bit_range = all_in_64bit_range && (INT64_MIN <= n) && (n <= (INT64_MAX));
+    }
+    assert(!all_in_64bit_range);
+  }
+
+  // Same test as above with __uint128_t.
+  {
+    __uint128_t a = UINT64_MAX / 3;
+    __uint128_t b = ((__uint128_t)UINT64_MAX) * 10;
+    std::minstd_rand0 e;
+    std::uniform_int_distribution<__uint128_t> d(a, b);
+    assert(d.min() == a && d.max() == b);
+    bool all_in_64bit_range = true;
+    for (int i = 0; i < 100; ++i) {
+      __uint128_t n = d(e);
+      assert(a <= n && n <= b);
+      all_in_64bit_range = all_in_64bit_range && (n <= (UINT64_MAX));
+    }
+    assert(!all_in_64bit_range);
+  }
+
+  // Regression test for PR#51520:
+  {
+    std::minstd_rand0 e;
+    std::uniform_int_distribution<__int128_t> d(INT64_MIN, INT64_MAX);
+    assert(d.min() == INT64_MIN && d.max() == INT64_MAX);
+    for (int i = 0; i < 100; ++i) {
+      __int128_t n = d(e);
+      assert((INT64_MIN <= n) && (n <= INT64_MAX));
+    }
+  }
+
+#endif // _LIBCPP_HAS_NO_INT128
+
+  return 0;
+}