[libc-commits] [libc] 0d1ed93 - [libc] Tag cpp::byte with gnu::may_alias (#200462)
via libc-commits
libc-commits at lists.llvm.org
Sat May 30 08:40:03 PDT 2026
Author: Mikhail R. Gadelha
Date: 2026-05-30T12:39:58-03:00
New Revision: 0d1ed9383d1dc5133409e5088e336bc54700fbf7
URL: https://github.com/llvm/llvm-project/commit/0d1ed9383d1dc5133409e5088e336bc54700fbf7
DIFF: https://github.com/llvm/llvm-project/commit/0d1ed9383d1dc5133409e5088e336bc54700fbf7.diff
LOG: [libc] Tag cpp::byte with gnu::may_alias (#200462)
Clang's TBAA grants the [basic.lval]/11.3 char-aliasing privilege only
to the named ::std::byte type (Type::isStdByteType() requires the enum
to be declared in the std namespace). LIBC_NAMESPACE::cpp::byte lives in
libc's cpp namespace, so it gets its own TBAA node disjoint from char
even though it has the same shape as std::byte.
That mismatch lets the optimizer reorder typed loads past raw-byte
writes through cpp::byte *, miscompiling HeapSort on rv64/Release
(UnsortedThreeElementArray{1,2,3}, UnsortedTwoElementArray1 in
SortingTest.h). The same hazard is latent in every cpp::byte *-based
raw-aliasing site: memory_utils Ptr/CPtr, lsearch/lfind, block.h and
freelist_heap.h allocator metadata.
Tag the type with gnu::may_alias so accesses through cpp::byte * share
the universal char-aliasing TBAA node, fixing all of the above in one
place. This patch also reverts PR #194171, as the may_alias attribute
fixes it too.
Added:
Modified:
libc/src/__support/CPP/cstddef.h
libc/src/stdlib/qsort_data.h
Removed:
################################################################################
diff --git a/libc/src/__support/CPP/cstddef.h b/libc/src/__support/CPP/cstddef.h
index ed6c9d03362f4..559a2c2494b3b 100644
--- a/libc/src/__support/CPP/cstddef.h
+++ b/libc/src/__support/CPP/cstddef.h
@@ -16,7 +16,11 @@
namespace LIBC_NAMESPACE_DECL {
namespace cpp {
-enum class byte : unsigned char {};
+// Clang grants the [basic.lval]/11.3 char-aliasing privilege only to the named
+// `::std::byte` type (see `Type::isStdByteType()`), so `cpp::byte` would
+// otherwise get its own TBAA node. `may_alias` makes accesses through
+// `cpp::byte *` char-aliasing as callers expect.
+enum class [[gnu::may_alias]] byte : unsigned char {};
template <class IntegerType>
LIBC_INLINE constexpr enable_if_t<is_integral_v<IntegerType>, byte>
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index d1e2ce8c00a1f..4f9774088fbd3 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -18,17 +18,17 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
class ArrayGenericSize {
- unsigned char *array_base;
+ cpp::byte *array_base;
size_t array_len;
size_t elem_size;
- LIBC_INLINE unsigned char *get_internal(size_t i) const {
+ LIBC_INLINE cpp::byte *get_internal(size_t i) const {
return array_base + (i * elem_size);
}
public:
LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
- : array_base(reinterpret_cast<unsigned char *>(a)), array_len(s),
+ : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
elem_size(e) {}
static constexpr bool has_fixed_size() { return false; }
@@ -46,14 +46,13 @@ class ArrayGenericSize {
using block_t = uint32_t;
constexpr size_t BLOCK_SIZE = sizeof(block_t);
- alignas(block_t) unsigned char tmp_block[BLOCK_SIZE];
+ alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
- unsigned char *elem_i = get_internal(i);
- unsigned char *elem_j = get_internal(j);
+ cpp::byte *elem_i = get_internal(i);
+ cpp::byte *elem_j = get_internal(j);
const size_t elem_size_rem = elem_size % BLOCK_SIZE;
- const unsigned char *elem_i_block_end =
- elem_i + (elem_size - elem_size_rem);
+ const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
while (elem_i != elem_i_block_end) {
inline_memcpy(tmp_block, elem_i, BLOCK_SIZE);
@@ -65,7 +64,7 @@ class ArrayGenericSize {
}
for (size_t n = 0; n < elem_size_rem; ++n) {
- unsigned char tmp = elem_i[n];
+ cpp::byte tmp = elem_i[n];
elem_i[n] = elem_j[n];
elem_j[n] = tmp;
}
@@ -90,16 +89,16 @@ class ArrayGenericSize {
// compile-time what the size of the element is, allows for much more
// efficient swapping and for cheaper offset calculations.
template <size_t ELEM_SIZE> class ArrayFixedSize {
- unsigned char *array_base;
+ cpp::byte *array_base;
size_t array_len;
- LIBC_INLINE unsigned char *get_internal(size_t i) const {
+ LIBC_INLINE cpp::byte *get_internal(size_t i) const {
return array_base + (i * ELEM_SIZE);
}
public:
LIBC_INLINE ArrayFixedSize(void *a, size_t s)
- : array_base(reinterpret_cast<unsigned char *>(a)), array_len(s) {}
+ : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
// Beware this function is used a heuristic for cheap to swap types, so
// instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
@@ -109,10 +108,10 @@ template <size_t ELEM_SIZE> class ArrayFixedSize {
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
LIBC_INLINE void swap(size_t i, size_t j) const {
- alignas(32) unsigned char tmp[ELEM_SIZE];
+ alignas(32) cpp::byte tmp[ELEM_SIZE];
- unsigned char *elem_i = get_internal(i);
- unsigned char *elem_j = get_internal(j);
+ cpp::byte *elem_i = get_internal(i);
+ cpp::byte *elem_j = get_internal(j);
inline_memcpy(tmp, elem_i, ELEM_SIZE);
__builtin_memmove(elem_i, elem_j, ELEM_SIZE);
More information about the libc-commits
mailing list