[libcxx-commits] [libcxx] [libc++] Add std::hash support for wide _BitInt types (PR #193194)

Xavier Roche via libcxx-commits libcxx-commits at lists.llvm.org
Fri Jun 5 06:43:11 PDT 2026


https://github.com/xroche updated https://github.com/llvm/llvm-project/pull/193194

>From fa5864b8b8c03b44d47244906f97ddf62ad9aec3 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:18:20 +0200
Subject: [PATCH 1/3] [libc++] Add std::hash support for wide _BitInt types

The existing __scalar_hash specializations only cover sizeof(T) up to
4 * sizeof(size_t), so std::hash<unsigned _BitInt(N)> failed to compile
for any N where sizeof exceeds that (N > 256 on 64-bit). Make the
primary template a defined fallback that hashes sizeof(T) raw bytes via
std::__hash_memory; the existing 0..4-word specializations still take
precedence and keep their current ABI.

Part of the _BitInt(N) libc++ effort:
https://discourse.llvm.org/t/bitint-n-support-in-libc-investigations-possible-improvements-looking-for-guidance/90063

Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 libcxx/include/__functional/hash.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index c794f57356ae7..ae7e0ddf82ed5 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -258,8 +258,19 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __si
 }
 #endif
 
+// Primary template covers any width not handled by the explicit
+// specializations below (in particular _BitInt(N) with sizeof > 4 * sizeof(size_t)).
 template <class _Tp, size_t = sizeof(_Tp) / sizeof(size_t)>
-struct __scalar_hash;
+struct __scalar_hash : public __unary_function<_Tp, size_t> {
+  _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT {
+    union {
+      _Tp __t;
+      char __bytes[sizeof(_Tp)];
+    } __u;
+    __u.__t = __v;
+    return std::__hash_memory(std::addressof(__u), sizeof(__u));
+  }
+};
 
 template <class _Tp>
 struct __scalar_hash<_Tp, 0> : public __unary_function<_Tp, size_t> {

>From eebff5772d3392c7b663947e256febaf9a988170 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:19:26 +0200
Subject: [PATCH 2/3] [libc++][test] Cover std::hash for _BitInt(N)

Test that std::hash<_BitInt(N)> exists, is noexcept, and respects the
hash-equality invariant across signed and unsigned widths from 7 up to
__BITINT_MAXWIDTH__. Widths above 256 exercise the new primary
__scalar_hash template. The sanity check on distinct small values
guards against a regression where the hash would reflect only padding
or a fixed prefix.

Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 .../unord.hash/integral.bitint.pass.cpp       | 149 ++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp

diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
new file mode 100644
index 0000000000000..e2f9d40e0e29f
--- /dev/null
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
@@ -0,0 +1,149 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <functional>
+
+// std::hash<_BitInt(N)>
+
+#include <cassert>
+#include <cstddef>
+#include <functional>
+#include <type_traits>
+
+#include "test_macros.h"
+
+#if TEST_HAS_EXTENSION(bit_int)
+
+template <class T>
+void test_basic() {
+  using H = std::hash<T>;
+  ASSERT_NOEXCEPT(H{}(T{}));
+  static_assert(std::is_same_v<decltype(H{}(T{})), std::size_t>);
+
+  // Same value -> same hash, every time.
+  H h;
+  for (int v = 0; v <= 16; ++v) {
+    T t1(static_cast<T>(v));
+    T t2(static_cast<T>(v));
+    assert(h(t1) == h(t2));
+  }
+}
+
+// Distinct value bits should not collide trivially. The hash function is
+// allowed to collide, but adjacent small integers should not all hash to
+// the same value -- that would be a sign that the value was not being
+// included in the hash at all (e.g. hashing only padding).
+template <class T>
+void test_distinct_values_distinct_hashes() {
+  std::hash<T> h;
+  std::size_t hashes[16];
+  for (int i = 0; i < 16; ++i)
+    hashes[i] = h(static_cast<T>(i));
+  // At least 8 of 16 must be unique. (Pigeonhole gives a much stronger
+  // bound for any reasonable hash, but be conservative for exotic hashes.)
+  int unique = 0;
+  for (int i = 0; i < 16; ++i) {
+    bool seen = false;
+    for (int j = 0; j < i; ++j)
+      if (hashes[i] == hashes[j])
+        seen = true;
+    if (!seen)
+      ++unique;
+  }
+  assert(unique >= 8);
+}
+
+// The standard guarantees `a == b` implies `hash(a) == hash(b)`. For
+// _BitInt(N), two values that compare equal might be reached via different
+// expression chains. Round-tripping through a wider unsigned type and
+// truncating must not change the hash.
+template <class T>
+void test_equal_values_same_hash_via_different_paths() {
+  std::hash<T> h;
+  T direct = static_cast<T>(42);
+  T via_wider;
+  {
+    using U64 = unsigned long long;
+    U64 wide  = 42;
+    via_wider = static_cast<T>(wide);
+  }
+  assert(direct == via_wider);
+  assert(h(direct) == h(via_wider));
+
+  // Bitwise-NOT of zero is the unsigned all-ones value. For the maximum
+  // representable value, two paths must agree.
+  if constexpr (!std::is_signed_v<T>) {
+    T max1 = static_cast<T>(~T(0));
+    T max2 = T(0);
+    for (int i = 0; i < (int)(sizeof(T) * 8); ++i)
+      max2 = static_cast<T>((max2 << 1) | T(1));
+    assert(h(max1) == h(max2));
+  }
+}
+
+template <class T>
+void test_all() {
+  test_basic<T>();
+  test_distinct_values_distinct_hashes<T>();
+  test_equal_values_same_hash_via_different_paths<T>();
+}
+
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+int main(int, char**) {
+#if TEST_HAS_EXTENSION(bit_int)
+  // Byte-aligned widths.
+  test_all<unsigned _BitInt(8)>();
+  test_all<signed _BitInt(8)>();
+  test_all<unsigned _BitInt(32)>();
+  test_all<signed _BitInt(32)>();
+  test_all<unsigned _BitInt(64)>();
+  test_all<signed _BitInt(64)>();
+
+  // Non-byte-aligned widths -- exercise types whose sizeof * CHAR_BIT
+  // exceeds the value-bit count.
+  test_all<unsigned _BitInt(7)>();
+  test_all<signed _BitInt(7)>();
+  test_all<unsigned _BitInt(13)>();
+  test_all<signed _BitInt(13)>();
+  test_all<unsigned _BitInt(37)>();
+  test_all<signed _BitInt(37)>();
+
+#  if __BITINT_MAXWIDTH__ >= 128
+  test_all<unsigned _BitInt(77)>();
+  test_all<signed _BitInt(77)>();
+  test_all<unsigned _BitInt(128)>();
+  test_all<signed _BitInt(128)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 256
+  test_all<unsigned _BitInt(129)>();
+  test_all<signed _BitInt(129)>();
+  test_all<unsigned _BitInt(255)>();
+  test_all<signed _BitInt(255)>();
+  test_all<unsigned _BitInt(256)>();
+  test_all<signed _BitInt(256)>();
+#  endif
+  // Widths that exercise the new primary template (sizeof > 4 * sizeof(size_t)).
+#  if __BITINT_MAXWIDTH__ >= 257
+  test_all<unsigned _BitInt(257)>();
+  test_all<signed _BitInt(257)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 1024
+  test_all<unsigned _BitInt(1024)>();
+  test_all<signed _BitInt(1024)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 4096
+  test_all<unsigned _BitInt(4096)>();
+  test_all<signed _BitInt(4096)>();
+#  endif
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+  return 0;
+}

>From 85de19800ed3ad0ebd121dc90ec61107349c7473 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Fri, 5 Jun 2026 15:42:46 +0200
Subject: [PATCH 3/3] [libc++] Strengthen std::hash _BitInt(N) padding-bit
 equivalence test

The standard requires `a == b => hash(a) == hash(b)` for any T that
std::hash supports. For _BitInt(N) where N is not a multiple of
CHAR_BIT, storage carries padding bits whose value is unspecified;
two equal values may have different storage byte patterns.

The existing test_equal_values_same_hash_via_different_paths
constructs equal values via two cast chains, but both chains route
through the same _BitInt ABI normalization, so they produce
byte-identical storage by construction and never probe the actual
invariant.

Add test_padding_bits_dont_break_equivalence which memcpy-copies a
value, flips every padding-bit position in the byte representation,
and asserts both `a == b` and `hash(a) == hash(b)`. This directly
probes the spec invariant against the adversarial case.

Document the by-value parameter dependency in `__scalar_hash`:
re-binding to a reference parameter, or a future ABI revision that
left padding unspecified across calls, would silently break the
invariant.

Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 libcxx/include/__functional/hash.h            | 11 ++++++-
 .../unord.hash/integral.bitint.pass.cpp       | 29 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index ae7e0ddf82ed5..268cd7a7011df 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -259,7 +259,16 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __si
 #endif
 
 // Primary template covers any width not handled by the explicit
-// specializations below (in particular _BitInt(N) with sizeof > 4 * sizeof(size_t)).
+// specializations below, in particular _BitInt(N) with sizeof > 4 * sizeof(size_t).
+//
+// _BitInt(N) where N is not a multiple of CHAR_BIT has padding bits in
+// storage. The by-value parameter `__v` re-launders those bits via Clang's
+// _BitInt ABI (zero-extend unsigned, sign-extend signed), so the bytes
+// hashed below are deterministic per value, not per source storage. A
+// future ABI change leaving padding unspecified across calls, or a switch
+// to a reference parameter that skips the laundering, would break
+// `a == b => hash(a) == hash(b)`. See test_padding_bits_dont_break_equivalence
+// in integral.bitint.pass.cpp.
 template <class _Tp, size_t = sizeof(_Tp) / sizeof(size_t)>
 struct __scalar_hash : public __unary_function<_Tp, size_t> {
   _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT {
diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
index e2f9d40e0e29f..afa67dafd0b94 100644
--- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
@@ -14,7 +14,9 @@
 
 #include <cassert>
 #include <cstddef>
+#include <cstring>
 #include <functional>
+#include <limits>
 #include <type_traits>
 
 #include "test_macros.h"
@@ -88,11 +90,38 @@ void test_equal_values_same_hash_via_different_paths() {
   }
 }
 
+// Inject different bits at the padding-bit storage positions of two
+// otherwise-equal values, then confirm `a == b` and `hash(a) == hash(b)`.
+// Probes the spec invariant `a == b => hash(a) == hash(b)` directly,
+// rather than relying on the construction path to produce identical
+// byte patterns. Only applies when sizeof(T) * CHAR_BIT > digits +
+// is_signed, i.e. when storage carries padding bits.
+template <class T>
+void test_padding_bits_dont_break_equivalence() {
+  constexpr int value_bits   = std::numeric_limits<T>::digits + std::is_signed_v<T>;
+  constexpr int storage_bits = static_cast<int>(sizeof(T)) * 8;
+  if constexpr (storage_bits > value_bits) {
+    std::hash<T> h;
+    T clean(static_cast<T>(5));
+    T dirty;
+    std::memcpy(&dirty, &clean, sizeof(T));
+    // Flip every padding-bit position in the byte representation. On a
+    // little-endian _BitInt layout this lights up the high bits of the
+    // high storage byte (and any wholly-padding higher bytes).
+    auto* bytes = reinterpret_cast<unsigned char*>(&dirty);
+    for (int i = value_bits; i < storage_bits; ++i)
+      bytes[i / 8] ^= static_cast<unsigned char>(1u << (i % 8));
+    assert(clean == dirty);
+    assert(h(clean) == h(dirty));
+  }
+}
+
 template <class T>
 void test_all() {
   test_basic<T>();
   test_distinct_values_distinct_hashes<T>();
   test_equal_values_same_hash_via_different_paths<T>();
+  test_padding_bits_dont_break_equivalence<T>();
 }
 
 #endif // TEST_HAS_EXTENSION(bit_int)



More information about the libcxx-commits mailing list