[libc] [llvm] [libc] Unify and extend no_sanitize attributes for strlen. (PR #161316)

Alexey Samsonov via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 30 10:26:51 PDT 2025


https://github.com/vonosmas updated https://github.com/llvm/llvm-project/pull/161316

>From f27cf74192b3c1d83329b2d4008e23595c59c5a1 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Mon, 29 Sep 2025 22:21:02 -0700
Subject: [PATCH 1/3] [libc] Unify and extend no_sanitize attributes for
 strlen.

Fast strlen implementations (naive wide-reads, SIMD-based, and
x86_64/aarch64-optimized versions) all may perform
technically-out-of-bound reads, which leads to reports under ASan,
HWASan (on ARM machines), and also TSan (which also has the capability
to detect heap out-of-bound reads). So, we need to explicitly disable
instrumentation in all three cases.

Tragically, Clang didn't support `[[gnu::no_sanitize]]` syntax until
recently, and since we're supporting both GCC and Clang, we have to
revert to `__attribute__` syntax.
---
 libc/src/string/memory_utils/aarch64/inline_strlen.h | 3 ++-
 libc/src/string/memory_utils/generic/inline_strlen.h | 3 ++-
 libc/src/string/memory_utils/x86_64/inline_strlen.h  | 6 ++++--
 libc/src/string/string_utils.h                       | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 36fd1aa636b54..9e5320afe987f 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,7 +17,8 @@
 namespace LIBC_NAMESPACE_DECL {
 
 namespace neon {
-[[gnu::no_sanitize_address]] [[maybe_unused]] LIBC_INLINE static size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+[[maybe_unused]] LIBC_INLINE static size_t
 string_length(const char *src) {
   using Vector __attribute__((may_alias)) = uint8x8_t;
 
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index d7435afb03719..0c13209d106d4 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -24,7 +24,8 @@ LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
   return cpp::bit_cast<cpp::simd_mask<char>>(r);
 }
 
-[[clang::no_sanitize("address")]] LIBC_INLINE size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE size_t
 string_length(const char *src) {
   constexpr cpp::simd<char> null_byte = cpp::splat('\0');
 
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 739f8c1aaddbc..047f10d8b2bad 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -18,12 +18,14 @@ namespace LIBC_NAMESPACE_DECL {
 namespace string_length_internal {
 // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
 template <typename Vector, typename Mask>
-[[gnu::no_sanitize_address]] LIBC_INLINE static Mask
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static Mask
 compare_and_mask(const Vector *block_ptr);
 
 template <typename Vector, typename Mask,
           decltype(compare_and_mask<Vector, Mask>)>
-[[gnu::no_sanitize_address]] LIBC_INLINE static size_t
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static size_t
 string_length_vector(const char *src) {
   uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
 
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 9d636d02f4756..6ee94c244034b 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -119,7 +119,8 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
 }
 
 template <typename Word>
-[[gnu::no_sanitize_address]] LIBC_INLINE void *
+__attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE void *
 find_first_character_wide_read(const unsigned char *src, unsigned char ch,
                                size_t n) {
   const unsigned char *char_ptr = src;

>From f2e0c24f01f93569e00123850ceb8a514228586c Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Mon, 29 Sep 2025 22:41:22 -0700
Subject: [PATCH 2/3] move attribute list to a different place

---
 libc/src/string/memory_utils/aarch64/inline_strlen.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 9e5320afe987f..49b901f2014a2 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,8 +17,8 @@
 namespace LIBC_NAMESPACE_DECL {
 
 namespace neon {
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-[[maybe_unused]] LIBC_INLINE static size_t
+[[maybe_unused]] __attribute__((no_sanitize("address", "hwaddress", "thread")))
+LIBC_INLINE static size_t
 string_length(const char *src) {
   using Vector __attribute__((may_alias)) = uint8x8_t;
 

>From 54adc87a3f8373a3ee7def359092708110a8fade Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Tue, 30 Sep 2025 10:26:14 -0700
Subject: [PATCH 3/3] Move no_sanitize attribute to macro

---
 libc/src/__support/macros/attributes.h               | 10 ++++++++++
 libc/src/string/CMakeLists.txt                       |  1 +
 libc/src/string/memory_utils/aarch64/inline_strlen.h |  3 +--
 libc/src/string/memory_utils/generic/inline_strlen.h |  4 +---
 libc/src/string/memory_utils/x86_64/inline_strlen.h  |  6 ++----
 libc/src/string/string_utils.h                       |  4 ++--
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel    |  1 +
 7 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h
index 145aa3b65057c..7dd28fca97db7 100644
--- a/libc/src/__support/macros/attributes.h
+++ b/libc/src/__support/macros/attributes.h
@@ -81,4 +81,14 @@ LIBC_THREAD_MODE_EXTERNAL.
 #define LIBC_HAS_VECTOR_TYPE 0
 #endif
 
+#if __has_attribute(no_sanitize)
+// Disable regular and hardware-supported ASan for functions that may
+// intentionally make out-of-bounds access. Disable TSan as well, as it detects
+// out-of-bounds accesses to heap memory.
+#define LIBC_NOSANITIZE_OOB_ACCESS                                             \
+  __attribute__((no_sanitize("address", "hwaddress", "thread")))
+#else
+#define LIBC_NOSANITIZE_OOB_ACCESS
+#endif
+
 #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index b8cdb2a7d3538..83c956429be24 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -22,6 +22,7 @@ add_header_library(
     libc.src.__support.CPP.type_traits
     libc.src.__support.CPP.simd
     libc.src.__support.common
+    libc.src.__support.macros.attributes
     libc.src.string.memory_utils.inline_memcpy
   ${string_config_options}
 )
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 49b901f2014a2..fa6b7e36eadfe 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,8 +17,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 namespace neon {
-[[maybe_unused]] __attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static size_t
+[[maybe_unused]] LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static size_t
 string_length(const char *src) {
   using Vector __attribute__((may_alias)) = uint8x8_t;
 
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 0c13209d106d4..f14c81675611c 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -24,9 +24,7 @@ LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
   return cpp::bit_cast<cpp::simd_mask<char>>(r);
 }
 
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE size_t
-string_length(const char *src) {
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) {
   constexpr cpp::simd<char> null_byte = cpp::splat('\0');
 
   size_t alignment = alignof(cpp::simd<char>);
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 047f10d8b2bad..c388724cf7fe0 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL {
 namespace string_length_internal {
 // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
 template <typename Vector, typename Mask>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static Mask
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static Mask
 compare_and_mask(const Vector *block_ptr);
 
 template <typename Vector, typename Mask,
           decltype(compare_and_mask<Vector, Mask>)>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE static size_t
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE static size_t
 string_length_vector(const char *src) {
   uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
 
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 6ee94c244034b..5cb96533d0398 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -19,6 +19,7 @@
 #include "hdr/types/size_t.h"
 #include "src/__support/CPP/bitset.h"
 #include "src/__support/CPP/type_traits.h" // cpp::is_same_v
+#include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 #include "src/string/memory_utils/inline_memcpy.h"
@@ -119,8 +120,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
 }
 
 template <typename Word>
-__attribute__((no_sanitize("address", "hwaddress", "thread")))
-LIBC_INLINE void *
+LIBC_NOSANITIZE_OOB_ACCESS LIBC_INLINE void *
 find_first_character_wide_read(const unsigned char *src, unsigned char ch,
                                size_t n) {
   const unsigned char *char_ptr = src;
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 8d9e80393bf20..287750c30a2ac 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5320,6 +5320,7 @@ libc_support_library(
         ":__support_common",
         ":__support_cpp_bitset",
         ":__support_cpp_type_traits",
+        ":__support_macros_attributes",
         ":__support_macros_optimization",
         ":hdr_limits_macros",
         ":llvm_libc_types_size_t",



More information about the llvm-commits mailing list