[libcxx-commits] [libcxx] [libc++] Add std::hash support for wide _BitInt types (PR #193194)

Xavier Roche via libcxx-commits libcxx-commits at lists.llvm.org
Tue Apr 21 04:20:57 PDT 2026


https://github.com/xroche created https://github.com/llvm/llvm-project/pull/193194

The existing `__scalar_hash` only had explicit specializations for `sizeof` up to `4 * sizeof(size_t)`, so `std::hash<unsigned _BitInt(N)>` failed to compile for `N > 256` (on 64-bit). Make the primary template a defined fallback that hashes `sizeof(T)` raw bytes via `std::__hash_memory`; the existing 0..4-word specializations still take precedence, so no ABI change for narrower types.

Tests cover signed and unsigned `_BitInt` from 7 up to `__BITINT_MAXWIDTH__`, including stress checks that pollute the stack before each hash call (catches leaks of uninitialised bytes through the union trick) and that round-trip the same value through different conversion chains.

Note: the standard allows `_BitInt(N)` padding bits to be unspecified, so a defensive `memset(&__u, 0, sizeof(__u))` before the assignment in each `__scalar_hash` specialisation would protect against future implementations that do not normalise padding. Clang normalises today and the tests pass without it, so this PR keeps the fix minimal; happy to add the `memset` if the reviewer prefers.

This is part of the [_BitInt(N) libc++ effort](https://discourse.llvm.org/t/bitint-n-support-in-libc-investigations-possible-improvements-looking-for-guidance/90063).

Assisted-by: Claude (Anthropic)

>From 8e66489ac4dcec46da523f4f8ab7f08ec1f865b1 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:18:20 +0200
Subject: [PATCH 1/2] [libc++] Add std::hash support for wide _BitInt types

The existing __scalar_hash specializations only cover sizeof(T) up to
4 * sizeof(size_t), so std::hash<unsigned _BitInt(N)> failed to compile
for any N where sizeof exceeds that (N > 256 on 64-bit). Make the
primary template a defined fallback that hashes sizeof(T) raw bytes via
std::__hash_memory; the existing 0..4-word specializations still take
precedence and keep their current ABI.

Part of the _BitInt(N) libc++ effort:
https://discourse.llvm.org/t/bitint-n-support-in-libc-investigations-possible-improvements-looking-for-guidance/90063

Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 libcxx/include/__functional/hash.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index fa7181984f163..4e0150cde38ff 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -256,8 +256,19 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __si
 }
 #endif
 
+// Primary template covers any width not handled by the explicit
+// specializations below (in particular _BitInt(N) with sizeof > 4 * sizeof(size_t)).
 template <class _Tp, size_t = sizeof(_Tp) / sizeof(size_t)>
-struct __scalar_hash;
+struct __scalar_hash : public __unary_function<_Tp, size_t> {
+  _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT {
+    union {
+      _Tp __t;
+      char __bytes[sizeof(_Tp)];
+    } __u;
+    __u.__t = __v;
+    return std::__hash_memory(std::addressof(__u), sizeof(__u));
+  }
+};
 
 template <class _Tp>
 struct __scalar_hash<_Tp, 0> : public __unary_function<_Tp, size_t> {

>From f7d226ca3fd7246dbd4bfa2026d1dc82ac972e12 Mon Sep 17 00:00:00 2001
From: Xavier Roche <xavier.roche at algolia.com>
Date: Tue, 21 Apr 2026 13:19:26 +0200
Subject: [PATCH 2/2] [libc++][test] Cover std::hash for _BitInt(N)

Test that std::hash<_BitInt(N)> exists, is noexcept, and respects the
hash-equality invariant across signed and unsigned widths from 7 up to
__BITINT_MAXWIDTH__. The widths above 256 exercise the new primary
__scalar_hash template; the smaller widths cover types that would be
silently miscompiled by a future implementation that did not normalise
padding bits on assignment (Clang does, so the tests pass today).

Stress checks include polluting the stack before each call (catches
leaks of uninitialised bytes through the union trick) and reaching the
same value through different conversion chains.

Assisted-by: Claude (Anthropic)
Co-Authored-By: Claude Opus 4.6 <noreply at anthropic.com>
---
 .../unord.hash/integral.bitint.pass.cpp       | 174 ++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp

diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
new file mode 100644
index 0000000000000..91532646d0c54
--- /dev/null
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.bitint.pass.cpp
@@ -0,0 +1,174 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <functional>
+
+// std::hash<_BitInt(N)>
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <functional>
+#include <type_traits>
+
+#include "test_macros.h"
+
+#if TEST_HAS_EXTENSION(bit_int)
+
+template <class T>
+void test_basic() {
+  using H = std::hash<T>;
+  ASSERT_NOEXCEPT(H{}(T{}));
+  static_assert(std::is_same_v<decltype(H{}(T{})), std::size_t>);
+
+  // Same value -> same hash, every time.
+  H h;
+  for (int v = 0; v <= 16; ++v) {
+    T t1(static_cast<T>(v));
+    T t2(static_cast<T>(v));
+    assert(h(t1) == h(t2));
+  }
+}
+
+// Pollute the stack with `pattern` before computing hash, to verify the
+// hash function does not leak uninitialized stack bytes through the union
+// trick that __scalar_hash uses internally.
+template <class T>
+[[gnu::noinline]] std::size_t hash_after_stack_pollution(T v, unsigned char pattern) {
+  unsigned char junk[256];
+  std::memset(junk, pattern, sizeof(junk));
+  asm volatile("" : : "r"(junk) : "memory");
+  return std::hash<T>{}(v);
+}
+
+template <class T>
+void test_stack_pollution() {
+  for (int v : {0, 1, 5, 42}) {
+    T t(static_cast<T>(v));
+    std::size_t h0 = hash_after_stack_pollution(t, 0x00);
+    std::size_t hf = hash_after_stack_pollution(t, 0xFF);
+    std::size_t ha = hash_after_stack_pollution(t, 0xAA);
+    assert(h0 == hf);
+    assert(hf == ha);
+  }
+}
+
+// Distinct value bits should not collide trivially. The hash function is
+// allowed to collide, but adjacent small integers should not all hash to
+// the same value -- that would be a sign that the value was not being
+// included in the hash at all (e.g. hashing only padding).
+template <class T>
+void test_distinct_values_distinct_hashes() {
+  std::hash<T> h;
+  std::size_t hashes[16];
+  for (int i = 0; i < 16; ++i)
+    hashes[i] = h(static_cast<T>(i));
+  // At least 8 of 16 must be unique. (Pigeonhole gives a much stronger
+  // bound for any reasonable hash, but be conservative for exotic hashes.)
+  int unique = 0;
+  for (int i = 0; i < 16; ++i) {
+    bool seen = false;
+    for (int j = 0; j < i; ++j)
+      if (hashes[i] == hashes[j])
+        seen = true;
+    if (!seen)
+      ++unique;
+  }
+  assert(unique >= 8);
+}
+
+// The standard guarantees `a == b` implies `hash(a) == hash(b)`. For
+// _BitInt(N), two values that compare equal might be reached via different
+// expression chains. Round-tripping through a wider unsigned type and
+// truncating must not change the hash.
+template <class T>
+void test_equal_values_same_hash_via_different_paths() {
+  std::hash<T> h;
+  T direct = static_cast<T>(42);
+  T via_wider;
+  {
+    using U64 = unsigned long long;
+    U64 wide = 42;
+    via_wider = static_cast<T>(wide);
+  }
+  assert(direct == via_wider);
+  assert(h(direct) == h(via_wider));
+
+  // Bitwise-NOT of zero is the unsigned all-ones value. For the maximum
+  // representable value, two paths must agree.
+  if constexpr (!std::is_signed_v<T>) {
+    T max1 = static_cast<T>(~T(0));
+    T max2 = T(0);
+    for (int i = 0; i < (int)(sizeof(T) * 8); ++i)
+      max2 = static_cast<T>((max2 << 1) | T(1));
+    assert(h(max1) == h(max2));
+  }
+}
+
+template <class T>
+void test_all() {
+  test_basic<T>();
+  test_stack_pollution<T>();
+  test_distinct_values_distinct_hashes<T>();
+  test_equal_values_same_hash_via_different_paths<T>();
+}
+
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+int main(int, char**) {
+#if TEST_HAS_EXTENSION(bit_int)
+  // Byte-aligned widths.
+  test_all<unsigned _BitInt(8)>();
+  test_all<signed _BitInt(8)>();
+  test_all<unsigned _BitInt(32)>();
+  test_all<signed _BitInt(32)>();
+  test_all<unsigned _BitInt(64)>();
+  test_all<signed _BitInt(64)>();
+
+  // Non-byte-aligned widths -- exercise types whose sizeof * CHAR_BIT
+  // exceeds the value-bit count.
+  test_all<unsigned _BitInt(7)>();
+  test_all<signed _BitInt(7)>();
+  test_all<unsigned _BitInt(13)>();
+  test_all<signed _BitInt(13)>();
+  test_all<unsigned _BitInt(37)>();
+  test_all<signed _BitInt(37)>();
+
+#  if __BITINT_MAXWIDTH__ >= 128
+  test_all<unsigned _BitInt(77)>();
+  test_all<signed _BitInt(77)>();
+  test_all<unsigned _BitInt(128)>();
+  test_all<signed _BitInt(128)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 256
+  test_all<unsigned _BitInt(129)>();
+  test_all<signed _BitInt(129)>();
+  test_all<unsigned _BitInt(255)>();
+  test_all<signed _BitInt(255)>();
+  test_all<unsigned _BitInt(256)>();
+  test_all<signed _BitInt(256)>();
+#  endif
+  // Widths that exercise the new primary template (sizeof > 4 * sizeof(size_t)).
+#  if __BITINT_MAXWIDTH__ >= 257
+  test_all<unsigned _BitInt(257)>();
+  test_all<signed _BitInt(257)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 1024
+  test_all<unsigned _BitInt(1024)>();
+  test_all<signed _BitInt(1024)>();
+#  endif
+#  if __BITINT_MAXWIDTH__ >= 4096
+  test_all<unsigned _BitInt(4096)>();
+  test_all<signed _BitInt(4096)>();
+#  endif
+#endif // TEST_HAS_EXTENSION(bit_int)
+
+  return 0;
+}



More information about the libcxx-commits mailing list