[libcxx-commits] [libcxx] [libc++] Add std::hash support for wide _BitInt types (PR #193194)
Xavier Roche via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Jun 5 06:43:11 PDT 2026
https://github.com/xroche updated https://github.com/llvm/llvm-project/pull/193194
>From fa5864b8b8c03b44d47244906f97ddf62ad9aec3 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:18:20 +0200
Subject: [PATCH 1/3] [libc++] Add std::hash support for wide _BitInt types
The existing __scalar_hash specializations only cover sizeof(T) up to
4 * sizeof(size_t), so std::hash<unsigned _BitInt(N)> failed to compile
for any N where sizeof exceeds that (N > 256 on 64-bit). Make the
primary template a defined fallback that hashes sizeof(T) raw bytes via
std::__hash_memory; the existing 0..4-word specializations still take
precedence and keep their current ABI.
Part of the _BitInt(N) libc++ effort:
https://discourse.llvm.org/t/bitint-n-support-in-libc-investigations-possible-improvements-looking-for-guidance/90063
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
libcxx/include/__functional/hash.h | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index c794f57356ae7..ae7e0ddf82ed5 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -258,8 +258,19 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __si
}
#endif
+// Primary template covers any width not handled by the explicit
+// specializations below (in particular _BitInt(N) with sizeof > 4 * sizeof(size_t)).
template <class _Tp, size_t = sizeof(_Tp) / sizeof(size_t)>
-struct __scalar_hash;
+struct __scalar_hash : public __unary_function<_Tp, size_t> {
+ _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT {
+ union {
+ _Tp __t;
+ char __bytes[sizeof(_Tp)];
+ } __u;
+ __u.__t = __v;
+ return std::__hash_memory(std::addressof(__u), sizeof(__u));
+ }
+};
template <class _Tp>
struct __scalar_hash<_Tp, 0> : public __unary_function<_Tp, size_t> {
>From eebff5772d3392c7b663947e256febaf9a988170 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:19:26 +0200
Subject: [PATCH 2/3] [libc++][test] Cover std::hash for _BitInt(N)
Test that std::hash<_BitInt(N)> exists, is noexcept, and respects the
hash-equality invariant across signed and unsigned widths from 7 up to
__BITINT_MAXWIDTH__. Widths above 256 exercise the new primary
__scalar_hash template. The sanity check on distinct small values
guards against a regression where the hash would reflect only padding
or a fixed prefix.
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
.../unord.hash/integral.bitint.pass.cpp | 149 ++++++++++++++++++
1 file changed, 149 insertions(+)
create mode 100644 libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
new file mode 100644
index 0000000000000..e2f9d40e0e29f
--- /dev/null
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
@@ -0,0 +1,149 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <functional>
+
+// std::hash<_BitInt(N)>
+
+#include <cassert>
+#include <cstddef>
+#include <functional>
+#include <type_traits>
+
+#include "test_macros.h"
+
+#if TEST_HAS_EXTENSION(bit_int)
+
+template <class T>
+void test_basic() {
+ using H = std::hash<T>;
+ ASSERT_NOEXCEPT(H{}(T{}));
+ static_assert(std::is_same_v<decltype(H{}(T{})), std::size_t>);
+
+ // Same value -> same hash, every time.
+ H h;
+ for (int v = 0; v <= 16; ++v) {
+ T t1(static_cast<T>(v));
+ T t2(static_cast<T>(v));
+ assert(h(t1) == h(t2));
+ }
+}
+
+// Distinct value bits should not collide trivially. The hash function is
+// allowed to collide, but adjacent small integers should not all hash to
+// the same value -- that would be a sign that the value was not being
+// included in the hash at all (e.g. hashing only padding).
+template <class T>
+void test_distinct_values_distinct_hashes() {
+ std::hash<T> h;
+ std::size_t hashes[16];
+ for (int i = 0; i < 16; ++i)
+ hashes[i] = h(static_cast<T>(i));
+ // At least 8 of 16 must be unique. (Pigeonhole gives a much stronger
+ // bound for any reasonable hash, but be conservative for exotic hashes.)
+ int unique = 0;
+ for (int i = 0; i < 16; ++i) {
+ bool seen = false;
+ for (int j = 0; j < i; ++j)
+ if (hashes[i] == hashes[j])
+ seen = true;
+ if (!seen)
+ ++unique;
+ }
+ assert(unique >= 8);
+}
+
+// The standard guarantees `a == b` implies `hash(a) == hash(b)`. For
+// _BitInt(N), two values that compare equal might be reached via different
+// expression chains. Round-tripping through a wider unsigned type and
+// truncating must not change the hash.
+template <class T>
+void test_equal_values_same_hash_via_different_paths() {
+ std::hash<T> h;
+ T direct = static_cast<T>(42);
+ T via_wider;
+ {
+ using U64 = unsigned long long;
+ U64 wide = 42;
+ via_wider = static_cast<T>(wide);
+ }
+ assert(direct == via_wider);
+ assert(h(direct) == h(via_wider));
+
+ // Bitwise-NOT of zero is the unsigned all-ones value. For the maximum
+ // representable value, two paths must agree.
+ if constexpr (!std::is_signed_v<T>) {
+ T max1 = static_cast<T>(~T(0));
+ T max2 = T(0);
+ for (int i = 0; i < (int)(sizeof(T) * 8); ++i)
+ max2 = static_cast<T>((max2 << 1) | T(1));
+ assert(h(max1) == h(max2));
+ }
+}
+
+template <class T>
+void test_all() {
+ test_basic<T>();
+ test_distinct_values_distinct_hashes<T>();
+ test_equal_values_same_hash_via_different_paths<T>();
+}
+
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+int main(int, char**) {
+#if TEST_HAS_EXTENSION(bit_int)
+ // Byte-aligned widths.
+ test_all<unsigned _BitInt(8)>();
+ test_all<signed _BitInt(8)>();
+ test_all<unsigned _BitInt(32)>();
+ test_all<signed _BitInt(32)>();
+ test_all<unsigned _BitInt(64)>();
+ test_all<signed _BitInt(64)>();
+
+ // Non-byte-aligned widths -- exercise types whose sizeof * CHAR_BIT
+ // exceeds the value-bit count.
+ test_all<unsigned _BitInt(7)>();
+ test_all<signed _BitInt(7)>();
+ test_all<unsigned _BitInt(13)>();
+ test_all<signed _BitInt(13)>();
+ test_all<unsigned _BitInt(37)>();
+ test_all<signed _BitInt(37)>();
+
+# if __BITINT_MAXWIDTH__ >= 128
+ test_all<unsigned _BitInt(77)>();
+ test_all<signed _BitInt(77)>();
+ test_all<unsigned _BitInt(128)>();
+ test_all<signed _BitInt(128)>();
+# endif
+# if __BITINT_MAXWIDTH__ >= 256
+ test_all<unsigned _BitInt(129)>();
+ test_all<signed _BitInt(129)>();
+ test_all<unsigned _BitInt(255)>();
+ test_all<signed _BitInt(255)>();
+ test_all<unsigned _BitInt(256)>();
+ test_all<signed _BitInt(256)>();
+# endif
+ // Widths that exercise the new primary template (sizeof > 4 * sizeof(size_t)).
+# if __BITINT_MAXWIDTH__ >= 257
+ test_all<unsigned _BitInt(257)>();
+ test_all<signed _BitInt(257)>();
+# endif
+# if __BITINT_MAXWIDTH__ >= 1024
+ test_all<unsigned _BitInt(1024)>();
+ test_all<signed _BitInt(1024)>();
+# endif
+# if __BITINT_MAXWIDTH__ >= 4096
+ test_all<unsigned _BitInt(4096)>();
+ test_all<signed _BitInt(4096)>();
+# endif
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+ return 0;
+}
>From 85de19800ed3ad0ebd121dc90ec61107349c7473 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Fri, 5 Jun 2026 15:42:46 +0200
Subject: [PATCH 3/3] [libc++] Strengthen std::hash _BitInt(N) padding-bit
equivalence test
The standard requires `a == b => hash(a) == hash(b)` for any T that
std::hash supports. For _BitInt(N) where N is not a multiple of
CHAR_BIT, storage carries padding bits whose value is unspecified;
two equal values may have different storage byte patterns.
The existing test_equal_values_same_hash_via_different_paths
constructs equal values via two cast chains, but both chains route
through the same _BitInt ABI normalization, so they produce
byte-identical storage by construction and never probe the actual
invariant.
Add test_padding_bits_dont_break_equivalence which memcpy-copies a
value, flips every padding-bit position in the byte representation,
and asserts both `a == b` and `hash(a) == hash(b)`. This directly
probes the spec invariant against the adversarial case.
Document the by-value parameter dependency in `__scalar_hash`:
re-binding to a reference parameter, or a future ABI revision that
left padding unspecified across calls, would silently break the
invariant.
Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
libcxx/include/__functional/hash.h | 11 ++++++-
.../unord.hash/integral.bitint.pass.cpp | 29 +++++++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index ae7e0ddf82ed5..268cd7a7011df 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -259,7 +259,16 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __si
#endif
// Primary template covers any width not handled by the explicit
-// specializations below (in particular _BitInt(N) with sizeof > 4 * sizeof(size_t)).
+// specializations below, in particular _BitInt(N) with sizeof > 4 * sizeof(size_t).
+//
+// _BitInt(N) where N is not a multiple of CHAR_BIT has padding bits in
+// storage. The by-value parameter `__v` re-launders those bits via Clang's
+// _BitInt ABI (zero-extend unsigned, sign-extend signed), so the bytes
+// hashed below are deterministic per value, not per source storage. A
+// future ABI change leaving padding unspecified across calls, or a switch
+// to a reference parameter that skips the laundering, would break
+// `a == b => hash(a) == hash(b)`. See test_padding_bits_dont_break_equivalence
+// in integral.bitint.pass.cpp.
template <class _Tp, size_t = sizeof(_Tp) / sizeof(size_t)>
struct __scalar_hash : public __unary_function<_Tp, size_t> {
_LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT {
diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
index e2f9d40e0e29f..afa67dafd0b94 100644
--- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
@@ -14,7 +14,9 @@
#include <cassert>
#include <cstddef>
+#include <cstring>
#include <functional>
+#include <limits>
#include <type_traits>
#include "test_macros.h"
@@ -88,11 +90,38 @@ void test_equal_values_same_hash_via_different_paths() {
}
}
+// Inject different bits at the padding-bit storage positions of two
+// otherwise-equal values, then confirm `a == b` and `hash(a) == hash(b)`.
+// Probes the spec invariant `a == b => hash(a) == hash(b)` directly,
+// rather than relying on the construction path to produce identical
+// byte patterns. Only applies when sizeof(T) * CHAR_BIT > digits +
+// is_signed, i.e. when storage carries padding bits.
+template <class T>
+void test_padding_bits_dont_break_equivalence() {
+ constexpr int value_bits = std::numeric_limits<T>::digits + std::is_signed_v<T>;
+ constexpr int storage_bits = static_cast<int>(sizeof(T)) * 8;
+ if constexpr (storage_bits > value_bits) {
+ std::hash<T> h;
+ T clean(static_cast<T>(5));
+ T dirty;
+ std::memcpy(&dirty, &clean, sizeof(T));
+ // Flip every padding-bit position in the byte representation. On a
+ // little-endian _BitInt layout this lights up the high bits of the
+ // high storage byte (and any wholly-padding higher bytes).
+ auto* bytes = reinterpret_cast<unsigned char*>(&dirty);
+ for (int i = value_bits; i < storage_bits; ++i)
+ bytes[i / 8] ^= static_cast<unsigned char>(1u << (i % 8));
+ assert(clean == dirty);
+ assert(h(clean) == h(dirty));
+ }
+}
+
template <class T>
void test_all() {
test_basic<T>();
test_distinct_values_distinct_hashes<T>();
test_equal_values_same_hash_via_different_paths<T>();
+ test_padding_bits_dont_break_equivalence<T>();
}
#endif // TEST_HAS_EXTENSION(bit_int)
More information about the libcxx-commits
mailing list