[libcxx-commits] [libcxx] [libc++] Fix possible out of range access in bitset (PR #121348)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat Mar 15 19:31:31 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Peng Liu (winner245)

<details>
<summary>Changes</summary>

According to the C++ standard, the minimum bit widths for `std::size_t`, `unsigned long`, and `unsigned long long` are as follows:

| Type              | `std::size_t` | `unsigned long` | `unsigned long long` |
|-------------------|---------------|-----------------|----------------------|
| **Minimum bit width** | 16            | 32              | 64                   |

Given the above specification, it is possible for `sizeof(unsigned long long) / sizeof(std::size_t) > 2` and `sizeof(unsigned long) / sizeof(std::size_t) > 1` on certain platforms, particularly 16-bit embedded systems. However, the current implementation of the unsigned long long constructor of `std::bitset` and its `to_ullong`/`to_ulong` conversion functions are not valid on such platforms. Therefore, this PR addresses these limitations.

#### to_ullong:

https://github.com/llvm/llvm-project/blob/b7637a855722b608ce2fb5aa860149db9b881197/libcxx/include/bitset#L384-L385

This implementation shifts each word by the same amount, `__bits_per_word` (`== sizeof(__storage_type) * CHAR_BIT`), which is only correct for single-iterations with `__i == 1`. If the loop count is larger, e.g., `sizeof(unsigned long long) / sizeof(std::size_t) > 2`, the `i`-th word must be shifted by `i * __bits_per_word` bits for `__i >= 2`, rather than the fixed amount `__bits_per_word`, to ensure correct concatenation of multiple words. This is fixed in this PR. 


#### to_ulong:

https://github.com/llvm/llvm-project/blob/b7637a855722b608ce2fb5aa860149db9b881197/libcxx/include/bitset#L527-L530

The `to_ulong(unsigned long )` function in the one-word specialization template `__bitset<1, _Size>` is not standard-conforming as it unconditionally returns `__first_`, while the standard requires that it throws `std::overflow_error` if the value cannot fit within `unsigned long` (i.e., `sizeof(size_t) > sizeof(unsigned long)`). On LLP64 (e.g., Windows and MinGW), we have `sizeof(size_t) = 64` and `sizeof(unsigned long) = 32` (according to: https://en.wikipedia.org/wiki/64-bit_computing#<!-- -->64-bit_data_models), and in this case, we should throw `std::overflow_error`. This is fixed in this PR. 

Additionally, the `to_ulong` function in `__bitset<_N_words, _Size>` throws `std::overflow_error` as expected when `sizeof(size_t) > sizeof(unsigned long)`. However, for the non-throwing case, it unconditionally returns a single word `__first[0]`. This is correct only when `sizeof(unsigned long) / sizeof(std::size_t) <= 1`. If, on certain platforms, `sizeof(unsigned long) / sizeof(std::size_t) == 2`, we need to concatenate the first two words as in `to_ullong`. This is addressed in this PR.

#### __bitset(unsigned long long __v):

This constructor currently only considers `sizeof(unsigned long long) / sizeof(std::size_t) <= 2`, which yields incorrect concatenations on platforms with `sizeof(unsigned long long) / sizeof(std::size_t) > 2`. 


Besides the above fixes, we also renamed some private member functions to use `__uglified_name`s (e.g., the private overloads of `to_ulong` and `to_ullong` are renamed to `__to_ulong` and `__to_ullong`). Also, some missing header files are added. 



---
Full diff: https://github.com/llvm/llvm-project/pull/121348.diff


1 Files Affected:

- (modified) libcxx/include/bitset (+91-45) 


``````````diff
diff --git a/libcxx/include/bitset b/libcxx/include/bitset
index f905b6f274e3f..a7de7ebccb1f6 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -136,6 +136,7 @@ template <size_t N> struct hash<std::bitset<N>>;
 #  include <__algorithm/fill.h>
 #  include <__algorithm/fill_n.h>
 #  include <__algorithm/find.h>
+#  include <__algorithm/min.h>
 #  include <__assert>
 #  include <__bit_reference>
 #  include <__config>
@@ -143,6 +144,7 @@ template <size_t N> struct hash<std::bitset<N>>;
 #  include <__cstddef/size_t.h>
 #  include <__functional/hash.h>
 #  include <__functional/unary_function.h>
+#  include <__type_traits/integral_constant.h>
 #  include <__type_traits/is_char_like_type.h>
 #  include <climits>
 #  include <stdexcept>
@@ -218,10 +220,10 @@ protected:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void flip() _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const {
-    return to_ulong(integral_constant < bool, _Size< sizeof(unsigned long) * CHAR_BIT>());
+    return __to_ulong(_BoolConstant < _Size< sizeof(unsigned long) * CHAR_BIT>());
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const {
-    return to_ullong(integral_constant < bool, _Size< sizeof(unsigned long long) * CHAR_BIT>());
+    return __to_ullong(_BoolConstant < _Size< sizeof(unsigned long long) * CHAR_BIT>());
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT;
@@ -233,12 +235,14 @@ private:
   void __init(unsigned long long __v, false_type) _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI void __init(unsigned long long __v, true_type) _NOEXCEPT;
 #  endif // _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong(false_type) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong(true_type) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong(false_type) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong(true_type) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong(true_type, false_type) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong(true_type, true_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(false_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(true_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(true_type, false_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(true_type, true_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long __to_ullong(false_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long __to_ullong(true_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long __to_ullong(true_type, false_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long __to_ullong(true_type, true_type) const;
 };
 
 template <size_t _N_words, size_t _Size>
@@ -283,21 +287,30 @@ inline _LIBCPP_HIDE_FROM_ABI void __bitset<_N_words, _Size>::__init(unsigned lon
 template <size_t _N_words, size_t _Size>
 inline _LIBCPP_CONSTEXPR __bitset<_N_words, _Size>::__bitset(unsigned long long __v) _NOEXCEPT
 #  ifndef _LIBCPP_CXX03_LANG
-#    if __SIZEOF_SIZE_T__ == 8
-    : __first_{__v}
-#    elif __SIZEOF_SIZE_T__ == 4
+#    if (__SIZEOF_LONG_LONG__ + __SIZEOF_SIZE_T__ - 1) / __SIZEOF_SIZE_T__ == 1
+    : __first_{static_cast<__storage_type>(__v)}
+#    elif (__SIZEOF_LONG_LONG__ + __SIZEOF_SIZE_T__ - 1) / __SIZEOF_SIZE_T__ == 2
+    : __first_{static_cast<__storage_type>(__v), static_cast<__storage_type>(__v >> __bits_per_word)}
+#    elif (__SIZEOF_LONG_LONG__ + __SIZEOF_SIZE_T__ - 1) / __SIZEOF_SIZE_T__ == 4
+#      if _N_words == 2
+    : __first_{static_cast<__storage_type>(__v), static_cast<__storage_type>(__v >> __bits_per_word)}
+#      elif _N_words == 3
     : __first_{static_cast<__storage_type>(__v),
-               _Size >= 2 * __bits_per_word
-                   ? static_cast<__storage_type>(__v >> __bits_per_word)
-                   : static_cast<__storage_type>((__v >> __bits_per_word) &
-                                                 (__storage_type(1) << (_Size - __bits_per_word)) - 1)}
+               static_cast<__storage_type>(__v >> __bits_per_word),
+               static_cast<__storage_type>(__v >> (__bits_per_word * 2))}
+#      else
+    : __first_{static_cast<__storage_type>(__v),
+               static_cast<__storage_type>(__v >> __bits_per_word),
+               static_cast<__storage_type>(__v >> (__bits_per_word * 2)),
+               static_cast<__storage_type>(__v >> (__bits_per_word * 3))}
+#      endif
 #    else
 #      error This constructor has not been ported to this platform
 #    endif
 #  endif
 {
 #  ifdef _LIBCPP_CXX03_LANG
-  __init(__v, integral_constant<bool, sizeof(unsigned long long) == sizeof(__storage_type)>());
+  __init(__v, _BoolConstant<sizeof(unsigned long long) == sizeof(__storage_type)>());
 #  endif
 }
 
@@ -338,52 +351,68 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void __bitset<_N_words, _Siz
 
 template <size_t _N_words, size_t _Size>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long
-__bitset<_N_words, _Size>::to_ulong(false_type) const {
-  __const_iterator __e = __make_iter(_Size);
-  __const_iterator __i = std::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true);
-  if (__i != __e)
-    std::__throw_overflow_error("bitset to_ulong overflow error");
+__bitset<_N_words, _Size>::__to_ulong(false_type) const {
+  if (auto __e = __make_iter(_Size); std::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true) != __e)
+    std::__throw_overflow_error("bitset __to_ulong overflow error");
+
+  return __to_ulong(true_type());
+}
 
-  return __first_[0];
+template <size_t _N_words, size_t _Size>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long
+__bitset<_N_words, _Size>::__to_ulong(true_type) const {
+  return __to_ulong(true_type(), _BoolConstant<sizeof(__storage_type) < sizeof(unsigned long)>());
 }
 
 template <size_t _N_words, size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long
-__bitset<_N_words, _Size>::to_ulong(true_type) const {
-  return __first_[0];
+__bitset<_N_words, _Size>::__to_ulong(true_type, false_type) const {
+  return static_cast<unsigned long>(__first_[0]);
+}
+
+template <size_t _N_words, size_t _Size>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long
+__bitset<_N_words, _Size>::__to_ulong(true_type, true_type) const {
+  unsigned long __r = static_cast<unsigned long>(__first_[0]);
+  _LIBCPP_DIAGNOSTIC_PUSH
+  _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshift-count-overflow")
+  for (size_t __i = 1; __i < _N_words; ++__i)
+    __r |= static_cast<unsigned long>(__first_[__i]) << (__bits_per_word * __i);
+  _LIBCPP_DIAGNOSTIC_POP
+  return __r;
 }
 
 template <size_t _N_words, size_t _Size>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long
-__bitset<_N_words, _Size>::to_ullong(false_type) const {
-  __const_iterator __e = __make_iter(_Size);
-  __const_iterator __i = std::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true);
-  if (__i != __e)
-    std::__throw_overflow_error("bitset to_ullong overflow error");
+__bitset<_N_words, _Size>::__to_ullong(false_type) const {
+  if (auto __e = __make_iter(_Size); std::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true) != __e)
+    std::__throw_overflow_error("bitset __to_ullong overflow error");
 
-  return to_ullong(true_type());
+  return __to_ullong(true_type());
 }
 
 template <size_t _N_words, size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long
-__bitset<_N_words, _Size>::to_ullong(true_type) const {
-  return to_ullong(true_type(), integral_constant<bool, sizeof(__storage_type) < sizeof(unsigned long long)>());
+__bitset<_N_words, _Size>::__to_ullong(true_type) const {
+  return __to_ullong(true_type(), _BoolConstant<sizeof(__storage_type) < sizeof(unsigned long long)>());
 }
 
 template <size_t _N_words, size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long
-__bitset<_N_words, _Size>::to_ullong(true_type, false_type) const {
-  return __first_[0];
+__bitset<_N_words, _Size>::__to_ullong(true_type, false_type) const {
+  return static_cast<unsigned long long>(__first_[0]);
 }
 
 template <size_t _N_words, size_t _Size>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long
-__bitset<_N_words, _Size>::to_ullong(true_type, true_type) const {
-  unsigned long long __r = __first_[0];
+__bitset<_N_words, _Size>::__to_ullong(true_type, true_type) const {
+  unsigned long long __r = static_cast<unsigned long long>(__first_[0]);
   _LIBCPP_DIAGNOSTIC_PUSH
   _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshift-count-overflow")
-  for (size_t __i = 1; __i < sizeof(unsigned long long) / sizeof(__storage_type); ++__i)
-    __r |= static_cast<unsigned long long>(__first_[__i]) << (sizeof(__storage_type) * CHAR_BIT);
+  const size_t __ull_wrods = (sizeof(unsigned long long) - 1) / sizeof(__storage_type) + 1;
+  const size_t __n_words   = _N_words < __ull_wrods ? _N_words : __ull_wrods;
+  for (size_t __i = 1; __i < __n_words; ++__i)
+    __r |= static_cast<unsigned long long>(__first_[__i]) << (__bits_per_word * __i);
   _LIBCPP_DIAGNOSTIC_POP
   return __r;
 }
@@ -483,6 +512,10 @@ protected:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT;
 
   _LIBCPP_HIDE_FROM_ABI size_t __hash_code() const _NOEXCEPT;
+
+private:
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(false_type) const;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __to_ulong(true_type) const;
 };
 
 template <size_t _Size>
@@ -490,8 +523,7 @@ inline _LIBCPP_CONSTEXPR __bitset<1, _Size>::__bitset() _NOEXCEPT : __first_(0)
 
 template <size_t _Size>
 inline _LIBCPP_CONSTEXPR __bitset<1, _Size>::__bitset(unsigned long long __v) _NOEXCEPT
-    : __first_(_Size == __bits_per_word ? static_cast<__storage_type>(__v)
-                                        : static_cast<__storage_type>(__v) & ((__storage_type(1) << _Size) - 1)) {}
+    : __first_(_Size == __bits_per_word ? static_cast<__storage_type>(__v) : static_cast<__storage_type>(__v)) {}
 
 template <size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void
@@ -518,12 +550,25 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void __bitset<1, _Siz
 
 template <size_t _Size>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __bitset<1, _Size>::to_ulong() const {
-  return __first_;
+  return __to_ulong(_BoolConstant < _Size< sizeof(unsigned long) * CHAR_BIT>());
+}
+
+template <size_t _Size>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __bitset<1, _Size>::__to_ulong(false_type) const {
+  if (auto __e = __make_iter(_Size); std::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true) != __e)
+    __throw_overflow_error("__bitset<1, _Size>::__to_ulong overflow error");
+
+  return static_cast<unsigned long>(__first_);
+}
+
+template <size_t _Size>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long __bitset<1, _Size>::__to_ulong(true_type) const {
+  return static_cast<unsigned long>(__first_);
 }
 
 template <size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long __bitset<1, _Size>::to_ullong() const {
-  return __first_;
+  return static_cast<unsigned long long>(__first_);
 }
 
 template <size_t _Size>
@@ -587,8 +632,8 @@ protected:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void flip() _NOEXCEPT {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const { return 0; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const { return 0; }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const { return 0UL; }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const { return 0ULL; }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT { return true; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT { return false; }
@@ -618,7 +663,8 @@ public:
 
   // 23.3.5.1 constructors:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bitset() _NOEXCEPT {}
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bitset(unsigned long long __v) _NOEXCEPT : __base(__v) {}
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bitset(unsigned long long __v) _NOEXCEPT
+      : __base(sizeof(unsigned long long) * CHAR_BIT <= _Size ? __v : __v & ((1ULL << _Size) - 1)) {}
   template <class _CharT, __enable_if_t<_IsCharLikeType<_CharT>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 explicit bitset(
       const _CharT* __str,

``````````

</details>


https://github.com/llvm/llvm-project/pull/121348


More information about the libcxx-commits mailing list