[libcxx-commits] [libcxx] [libc++] Simplify bitset::init (PR #121357)

Mon Feb 24 15:51:22 PST 2025

================
@@ -253,26 +253,16 @@ inline _LIBCPP_CONSTEXPR __bitset<_N_words, _Size>::__bitset() _NOEXCEPT
 
 template <size_t _N_words, size_t _Size>
 void __bitset<_N_words, _Size>::__init(unsigned long long __v, false_type) _NOEXCEPT {
-  __storage_type __t[sizeof(unsigned long long) / sizeof(__storage_type)];
-  size_t __sz = _Size;
-  for (size_t __i = 0; __i < sizeof(__t) / sizeof(__t[0]); ++__i, __v >>= __bits_per_word, __sz -= __bits_per_word)
-    if (__sz < __bits_per_word)
-      __t[__i] = static_cast<__storage_type>(__v) & (1ULL << __sz) - 1;
-    else
-      __t[__i] = static_cast<__storage_type>(__v);
-
-  std::copy(__t, __t + sizeof(__t) / sizeof(__t[0]), __first_);
-  std::fill(
-      __first_ + sizeof(__t) / sizeof(__t[0]), __first_ + sizeof(__first_) / sizeof(__first_[0]), __storage_type(0));
+  const size_t __ull_words = sizeof(unsigned long long) / sizeof(__storage_type);
+  for (size_t __i = 0; __i < __ull_words; ++__i, __v >>= __bits_per_word)
+    __first_[__i] = static_cast<__storage_type>(__v);
+  std::fill(__first_ + __ull_words, __first_ + _N_words, __storage_type(0));
 }
 
 template <size_t _N_words, size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI void __bitset<_N_words, _Size>::__init(unsigned long long __v, true_type) _NOEXCEPT {
   __first_[0] = __v;
-  if (_Size < __bits_per_word)
-    __first_[0] &= (1ULL << _Size) - 1;
----------------
winner245 wrote:

Thanks for the feedback! I've run some benchmarks and based on the results, there doesn't seem to be any performance difference. 

```
--------------------------------------------
Benchmark             Before           After
--------------------------------------------
BM_ctor_ull/1       0.228 ns        0.230 ns
BM_ctor_ull/2       0.233 ns        0.228 ns
BM_ctor_ull/3       0.231 ns        0.232 ns
BM_ctor_ull/4       0.229 ns        0.229 ns
BM_ctor_ull/5       0.231 ns        0.230 ns
BM_ctor_ull/6       0.232 ns        0.233 ns
BM_ctor_ull/7       0.230 ns        0.228 ns
BM_ctor_ull/8       0.233 ns        0.235 ns
BM_ctor_ull/9       0.232 ns        0.233 ns
BM_ctor_ull/10      0.232 ns        0.233 ns
BM_ctor_ull/11      0.235 ns        0.233 ns
BM_ctor_ull/12      0.230 ns        0.228 ns
BM_ctor_ull/13      0.231 ns        0.236 ns
BM_ctor_ull/14      0.231 ns        0.234 ns
BM_ctor_ull/15      0.232 ns        0.232 ns  
BM_ctor_ull/16      0.233 ns        0.236 ns   
BM_ctor_ull/17      0.234 ns        0.237 ns   
BM_ctor_ull/18      0.234 ns        0.236 ns   
BM_ctor_ull/19      0.235 ns        0.236 ns   
BM_ctor_ull/20      0.232 ns        0.232 ns   
BM_ctor_ull/21      0.236 ns        0.235 ns   
BM_ctor_ull/22      0.233 ns        0.230 ns   
BM_ctor_ull/23      0.234 ns        0.237 ns   
BM_ctor_ull/24      0.236 ns        0.236 ns   
BM_ctor_ull/25      0.234 ns        0.234 ns   
BM_ctor_ull/26      0.237 ns        0.235 ns   
BM_ctor_ull/27      0.233 ns        0.236 ns   
BM_ctor_ull/28      0.234 ns        0.238 ns   
BM_ctor_ull/29      0.235 ns        0.239 ns   
BM_ctor_ull/30      0.234 ns        0.240 ns   
BM_ctor_ull/31      0.235 ns        0.234 ns   
BM_ctor_ull/32      0.234 ns        0.231 ns   
BM_ctor_ull/33      0.235 ns        0.237 ns   
BM_ctor_ull/34      0.235 ns        0.234 ns   
BM_ctor_ull/35      0.235 ns        0.237 ns   
BM_ctor_ull/36      0.236 ns        0.234 ns   
BM_ctor_ull/37      0.233 ns        0.236 ns   
BM_ctor_ull/38      0.234 ns        0.235 ns   
BM_ctor_ull/39      0.233 ns        0.236 ns   
BM_ctor_ull/40      0.233 ns        0.234 ns 
BM_ctor_ull/41      0.234 ns        0.231 ns  
BM_ctor_ull/42      0.236 ns        0.233 ns  
BM_ctor_ull/43      0.233 ns        0.232 ns  
BM_ctor_ull/44      0.233 ns        0.240 ns  
BM_ctor_ull/45      0.230 ns        0.234 ns  
BM_ctor_ull/46      0.231 ns        0.235 ns 
BM_ctor_ull/47      0.233 ns        0.233 ns 
BM_ctor_ull/48      0.235 ns        0.231 ns 
BM_ctor_ull/49      0.232 ns        0.235 ns 
BM_ctor_ull/50      0.230 ns        0.235 ns 
BM_ctor_ull/51      0.234 ns        0.234 ns 
BM_ctor_ull/52      0.244 ns        0.235 ns  
BM_ctor_ull/53      0.242 ns        0.234 ns 
BM_ctor_ull/54      0.234 ns        0.236 ns
BM_ctor_ull/55      0.235 ns        0.236 ns
BM_ctor_ull/56      0.238 ns        0.234 ns
BM_ctor_ull/57      0.242 ns        0.234 ns
BM_ctor_ull/58      0.237 ns        0.236 ns
BM_ctor_ull/59      0.239 ns        0.230 ns
BM_ctor_ull/60      0.236 ns        0.236 ns
BM_ctor_ull/61      0.235 ns        0.238 ns
BM_ctor_ull/62      0.239 ns        0.232 ns
BM_ctor_ull/63      0.237 ns        0.238 ns
```
where `BM_ctor_ull/i` refers to a test case for the `unsigned long long` constructor with an argument `(1ULL << i) - 1` (which has exactly `i` 1s). My understanding is that there is not much we can optimize here since the input is only a scalar value. Therefore, my main objective of this PR is to simplify the code. 

https://github.com/llvm/llvm-project/pull/121357


[libcxx-commits] [libcxx] [libc++] Simplify __bitset::__init (PR #121357)

[libcxx-commits] [libcxx] [libc++] Simplify bitset::init (PR #121357)