[libc] [llvm] [libc] Unify and extend no_sanitize attributes for strlen. (PR #161316)
Alexey Samsonov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 10:26:51 PDT 2025
https://github.com/vonosmas updated https://github.com/llvm/llvm-project/pull/161316
>From f27cf74192b3c1d83329b2d4008e23595c59c5a1 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Mon, 29 Sep 2025 22:21:02 -0700
Subject: [PATCH 1/3] [libc] Unify and extend no_sanitize attributes for
strlen.
Fast strlen implementations (naive wide-reads, SIMD-based, and
x86_64/aarch64-optimized versions) all may perform
technically-out-of-bound reads, which leads to reports under ASan,
HWASan (on ARM machines), and also TSan (which also has the capability
to detect heap out-of-bound reads). So, we need to explicitly disable
instrumentation in all three cases.
Tragically, Clang didn't support `[[gnu::no_sanitize]]` syntax until
recently, and since we're supporting both GCC and Clang, we have to
revert to `__attribute__` syntax.
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 3 ++-
libc/src/string/memory_utils/generic/inline_strlen.h | 3 ++-
libc/src/string/memory_utils/x86_64/inline_strlen.h | 6 ++++--
libc/src/string/string_utils.h | 3 ++-
4 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 36fd1aa636b54..9e5320afe987f 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,7 +17,8 @@
namespace LIBC_NAMESPACE_DECL {
namespace neon {
-[[gnu::no_sanitize_address]] [[maybe_unused]] LIBC_INLINE static size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+[[maybe_unused]] LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index d7435afb03719..0c13209d106d4 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -24,7 +24,8 @@ LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
return cpp::bit_cast<cpp::simd_mask<char>>(r);
}
-[[clang::no_sanitize("address")]] LIBC_INLINE size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE size_t
string_length(const char *src) {
constexpr cpp::simd<char> null_byte = cpp::splat('\0');
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 739f8c1aaddbc..047f10d8b2bad 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -18,12 +18,14 @@ namespace LIBC_NAMESPACE_DECL {
namespace string_length_internal {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
-[[gnu::no_sanitize_address]] LIBC_INLINE static Mask
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static Mask
compare_and_mask(const Vector *block_ptr);
template <typename Vector, typename Mask,
decltype(compare_and_mask<Vector, Mask>)>
-[[gnu::no_sanitize_address]] LIBC_INLINE static size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static size_t
string_length_vector(const char *src) {
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 9d636d02f4756..6ee94c244034b 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -119,7 +119,8 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
}
template <typename Word>
-[[gnu::no_sanitize_address]] LIBC_INLINE void *
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE void *
find_first_character_wide_read(const unsigned char *src, unsigned char ch,
size_t n) {
const unsigned char *char_ptr = src;
>From f2e0c24f01f93569e00123850ceb8a514228586c Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Mon, 29 Sep 2025 22:41:22 -0700
Subject: [PATCH 2/3] move attribute list to a different place
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 9e5320afe987f..49b901f2014a2 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,8 +17,8 @@
namespace LIBC_NAMESPACE_DECL {
namespace neon {
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-[[maybe_unused]] LIBC_INLINE static size_t
+[[maybe_unused]] __attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
>From 54adc87a3f8373a3ee7def359092708110a8fade Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Tue, 30 Sep 2025 10:26:14 -0700
Subject: [PATCH 3/3] Move no_sanitize attribute to macro
---
libc/src/__support/macros/attributes.h | 10 ++++++++++
libc/src/string/CMakeLists.txt | 1 +
libc/src/string/memory_utils/aarch64/inline_strlen.h | 3 +--
libc/src/string/memory_utils/generic/inline_strlen.h | 4 +---
libc/src/string/memory_utils/x86_64/inline_strlen.h | 6 ++----
libc/src/string/string_utils.h | 4 ++--
utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 +
7 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h
index 145aa3b65057c..7dd28fca97db7 100644
--- a/libc/src/__support/macros/attributes.h
+++ b/libc/src/__support/macros/attributes.h
@@ -81,4 +81,14 @@ LIBC_THREAD_MODE_EXTERNAL.
#define LIBC_HAS_VECTOR_TYPE 0
#endif
+#if __has_attribute(no_sanitize)
+// Disable regular and hardware-supported ASan for functions that may
+// intentionally make out-of-bounds access. Disable TSan as well, as it detects
+// out-of-bounds accesses to heap memory.
+#define LIBC_NOSANITIZE_OOB_ACCESS \
+ __attribute__((no_sanitize("address", "hwaddress", "thread")))
+#else
+#define LIBC_NOSANITIZE_OOB_ACCESS
+#endif
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index b8cdb2a7d3538..83c956429be24 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -22,6 +22,7 @@ add_header_library(
libc.src.__support.CPP.type_traits
libc.src.__support.CPP.simd
libc.src.__support.common
+ libc.src.__support.macros.attributes
libc.src.string.memory_utils.inline_memcpy
${string_config_options}
)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 49b901f2014a2..fa6b7e36eadfe 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,8 +17,7 @@
namespace LIBC_NAMESPACE_DECL {
namespace neon {
-[[maybe_unused]] __attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static size_t
+[[maybe_unused]] LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 0c13209d106d4..f14c81675611c 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -24,9 +24,7 @@ LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
return cpp::bit_cast<cpp::simd_mask<char>>(r);
}
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE size_t
-string_length(const char *src) {
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) {
constexpr cpp::simd<char> null_byte = cpp::splat('\0');
size_t alignment = alignof(cpp::simd<char>);
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 047f10d8b2bad..c388724cf7fe0 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL {
namespace string_length_internal {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static Mask
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static Mask
compare_and_mask(const Vector *block_ptr);
template <typename Vector, typename Mask,
decltype(compare_and_mask<Vector, Mask>)>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static size_t
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length_vector(const char *src) {
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 6ee94c244034b..5cb96533d0398 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -19,6 +19,7 @@
#include "hdr/types/size_t.h"
#include "src/__support/CPP/bitset.h"
#include "src/__support/CPP/type_traits.h" // cpp::is_same_v
+#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_memcpy.h"
@@ -119,8 +120,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
}
template <typename Word>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE void *
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE void *
find_first_character_wide_read(const unsigned char *src, unsigned char ch,
size_t n) {
const unsigned char *char_ptr = src;
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 8d9e80393bf20..287750c30a2ac 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5320,6 +5320,7 @@ libc_support_library(
":__support_common",
":__support_cpp_bitset",
":__support_cpp_type_traits",
+ ":__support_macros_attributes",
":__support_macros_optimization",
":hdr_limits_macros",
":llvm_libc_types_size_t",
More information about the llvm-commits
mailing list