[libc-commits] [libc] [libc] Make cpp::byte alias-safe (PR #194171)
Mikhail R. Gadelha via libc-commits
libc-commits at lists.llvm.org
Fri May 1 07:52:39 PDT 2026
https://github.com/mikhailramalho updated https://github.com/llvm/llvm-project/pull/194171
>From 96039d7e19529647ba115e1db73239a2321f1595 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Fri, 1 May 2026 11:21:10 -0300
Subject: [PATCH 1/2] [libc] Use unsigned char* for raw-memory aliasing in
qsort and memory utils
Clang's TBAA recognizes std::byte by name and namespace:
Type::isStdByteType() (clang/lib/AST/Type.cpp) checks for an enum named
"byte" in the std namespace, and that's the sole predicate granting
char-aliasing in CodeGenTBAA.cpp. LIBC_NAMESPACE::cpp::byte lives in
libc's cpp namespace, so it gets its own TBAA node distinct from char
even though it has the same shape as std::byte.
On rv64 this lets the optimizer reorder int loads past the byte-wise
swap in ArrayGenericSize::swap, miscompiling HeapSort
(UnsortedThreeElementArray{1,2,3}, UnsortedTwoElementArray1).
Switch the raw-aliasing call sites to unsigned char*, which is
unambiguously a character type for TBAA. cpp::byte itself is left
unchanged for callers that want the std::byte-shaped strong typing.
---
libc/src/stdlib/qsort_data.h | 29 ++++++++++---------
.../memory_utils/generic/byte_per_byte.h | 2 +-
libc/src/string/memory_utils/utils.h | 6 ++--
.../test/src/string/memory_utils/op_tests.cpp | 6 ++--
4 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index 4f9774088fbd3..d1e2ce8c00a1f 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -18,17 +18,17 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
class ArrayGenericSize {
- cpp::byte *array_base;
+ unsigned char *array_base;
size_t array_len;
size_t elem_size;
- LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+ LIBC_INLINE unsigned char *get_internal(size_t i) const {
return array_base + (i * elem_size);
}
public:
LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
- : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
+ : array_base(reinterpret_cast<unsigned char *>(a)), array_len(s),
elem_size(e) {}
static constexpr bool has_fixed_size() { return false; }
@@ -46,13 +46,14 @@ class ArrayGenericSize {
using block_t = uint32_t;
constexpr size_t BLOCK_SIZE = sizeof(block_t);
- alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
+ alignas(block_t) unsigned char tmp_block[BLOCK_SIZE];
- cpp::byte *elem_i = get_internal(i);
- cpp::byte *elem_j = get_internal(j);
+ unsigned char *elem_i = get_internal(i);
+ unsigned char *elem_j = get_internal(j);
const size_t elem_size_rem = elem_size % BLOCK_SIZE;
- const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
+ const unsigned char *elem_i_block_end =
+ elem_i + (elem_size - elem_size_rem);
while (elem_i != elem_i_block_end) {
inline_memcpy(tmp_block, elem_i, BLOCK_SIZE);
@@ -64,7 +65,7 @@ class ArrayGenericSize {
}
for (size_t n = 0; n < elem_size_rem; ++n) {
- cpp::byte tmp = elem_i[n];
+ unsigned char tmp = elem_i[n];
elem_i[n] = elem_j[n];
elem_j[n] = tmp;
}
@@ -89,16 +90,16 @@ class ArrayGenericSize {
// compile-time what the size of the element is, allows for much more
// efficient swapping and for cheaper offset calculations.
template <size_t ELEM_SIZE> class ArrayFixedSize {
- cpp::byte *array_base;
+ unsigned char *array_base;
size_t array_len;
- LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+ LIBC_INLINE unsigned char *get_internal(size_t i) const {
return array_base + (i * ELEM_SIZE);
}
public:
LIBC_INLINE ArrayFixedSize(void *a, size_t s)
- : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
+ : array_base(reinterpret_cast<unsigned char *>(a)), array_len(s) {}
// Beware this function is used a heuristic for cheap to swap types, so
// instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
@@ -108,10 +109,10 @@ template <size_t ELEM_SIZE> class ArrayFixedSize {
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
LIBC_INLINE void swap(size_t i, size_t j) const {
- alignas(32) cpp::byte tmp[ELEM_SIZE];
+ alignas(32) unsigned char tmp[ELEM_SIZE];
- cpp::byte *elem_i = get_internal(i);
- cpp::byte *elem_j = get_internal(j);
+ unsigned char *elem_i = get_internal(i);
+ unsigned char *elem_j = get_internal(j);
inline_memcpy(tmp, elem_i, ELEM_SIZE);
__builtin_memmove(elem_i, elem_j, ELEM_SIZE);
diff --git a/libc/src/string/memory_utils/generic/byte_per_byte.h b/libc/src/string/memory_utils/generic/byte_per_byte.h
index 862aeecab7f55..b8a02bc352808 100644
--- a/libc/src/string/memory_utils/generic/byte_per_byte.h
+++ b/libc/src/string/memory_utils/generic/byte_per_byte.h
@@ -49,7 +49,7 @@ inline_memset_byte_per_byte(Ptr dst, uint8_t value, size_t count,
size_t offset = 0) {
LIBC_LOOP_NOUNROLL
for (; offset < count; ++offset)
- dst[offset] = static_cast<cpp::byte>(value);
+ dst[offset] = static_cast<unsigned char>(value);
}
[[maybe_unused]] LIBC_INLINE BcmpReturnType
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index 6f6882b49223b..3b13c63e45b25 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -105,8 +105,10 @@ LIBC_INLINE void memcpy_inline(void *__restrict dst,
#endif
}
-using Ptr = cpp::byte *; // Pointer to raw data.
-using CPtr = const cpp::byte *; // Pointer to const raw data.
+// These helpers read and write arbitrary object representations, so they must
+// use a character type for aliasing.
+using Ptr = unsigned char *; // Pointer to raw data.
+using CPtr = const unsigned char *; // Pointer to const raw data.
// This type makes sure that we don't accidentally promote an integral type to
// another one. It is only constructible from the exact T type.
diff --git a/libc/test/src/string/memory_utils/op_tests.cpp b/libc/test/src/string/memory_utils/op_tests.cpp
index 06089a89d9aac..8ac9bced096b8 100644
--- a/libc/test/src/string/memory_utils/op_tests.cpp
+++ b/libc/test/src/string/memory_utils/op_tests.cpp
@@ -61,9 +61,9 @@ using MemcpyImplementations = testing::TypeList<
#endif // LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
>;
-// Convenient helper to turn a span into cpp::byte *.
-static inline cpp::byte *as_byte(cpp::span<char> span) {
- return reinterpret_cast<cpp::byte *>(span.data());
+// Convenient helper to turn a span into a raw-memory pointer.
+static inline Ptr as_byte(cpp::span<char> span) {
+ return reinterpret_cast<Ptr>(span.data());
}
// Adapt CheckMemcpy signature to op implementation signatures.
>From 32348eeab19a236833b45ba81ddbfd3cf1544d52 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Fri, 1 May 2026 11:51:55 -0300
Subject: [PATCH 2/2] [libc] Revert memory_utils raw-pointer change, keep only
qsort_data fix
Drop the unsigned char* switch in memory_utils/utils.h (Ptr/CPtr),
byte_per_byte.h, and the op_tests.cpp helper. The qsort_data.h fix is
the only one tied to an observed test failure
(HeapSort.UnsortedThreeElementArray{1,2,3} on rv64); the memory_utils
change shares the same TBAA argument but no observed miscompile, so
keep it out of this PR and discuss separately.
---
libc/src/string/memory_utils/generic/byte_per_byte.h | 2 +-
libc/src/string/memory_utils/utils.h | 6 ++----
libc/test/src/string/memory_utils/op_tests.cpp | 6 +++---
3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/libc/src/string/memory_utils/generic/byte_per_byte.h b/libc/src/string/memory_utils/generic/byte_per_byte.h
index b8a02bc352808..862aeecab7f55 100644
--- a/libc/src/string/memory_utils/generic/byte_per_byte.h
+++ b/libc/src/string/memory_utils/generic/byte_per_byte.h
@@ -49,7 +49,7 @@ inline_memset_byte_per_byte(Ptr dst, uint8_t value, size_t count,
size_t offset = 0) {
LIBC_LOOP_NOUNROLL
for (; offset < count; ++offset)
- dst[offset] = static_cast<unsigned char>(value);
+ dst[offset] = static_cast<cpp::byte>(value);
}
[[maybe_unused]] LIBC_INLINE BcmpReturnType
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index 3b13c63e45b25..6f6882b49223b 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -105,10 +105,8 @@ LIBC_INLINE void memcpy_inline(void *__restrict dst,
#endif
}
-// These helpers read and write arbitrary object representations, so they must
-// use a character type for aliasing.
-using Ptr = unsigned char *; // Pointer to raw data.
-using CPtr = const unsigned char *; // Pointer to const raw data.
+using Ptr = cpp::byte *; // Pointer to raw data.
+using CPtr = const cpp::byte *; // Pointer to const raw data.
// This type makes sure that we don't accidentally promote an integral type to
// another one. It is only constructible from the exact T type.
diff --git a/libc/test/src/string/memory_utils/op_tests.cpp b/libc/test/src/string/memory_utils/op_tests.cpp
index 8ac9bced096b8..06089a89d9aac 100644
--- a/libc/test/src/string/memory_utils/op_tests.cpp
+++ b/libc/test/src/string/memory_utils/op_tests.cpp
@@ -61,9 +61,9 @@ using MemcpyImplementations = testing::TypeList<
#endif // LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
>;
-// Convenient helper to turn a span into a raw-memory pointer.
-static inline Ptr as_byte(cpp::span<char> span) {
- return reinterpret_cast<Ptr>(span.data());
+// Convenient helper to turn a span into cpp::byte *.
+static inline cpp::byte *as_byte(cpp::span<char> span) {
+ return reinterpret_cast<cpp::byte *>(span.data());
}
// Adapt CheckMemcpy signature to op implementation signatures.
More information about the libc-commits
mailing list