[compiler-rt] [compiler-rt][asan] Reland: wcscpy/wcsncpy interceptors and stabilize wchar tests on Darwin/Android (PR #162028)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 5 13:25:11 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-compiler-rt-sanitizer
Author: Yixuan Cao (Cao-Wuhui)
<details>
<summary>Changes</summary>
### Summary
Reland: wcscpy/wcsncpy interceptors and stabilize wchar tests on Darwin/Android. Functional reland (runtime + tests).
### Context
Reland of #<!-- -->160493 and #<!-- -->161624; previously reverted by #<!-- -->162021 and #<!-- -->162001 to restore green.
### Motivation
- Restore wchar interceptors (wcscpy/wcsncpy), broaden ASan coverage, and improve Windows parity with narrow-string checks.
- Make tests robust across Darwin/Android to keep bots green.
### Runtime (wcscpy/wcsncpy)
- Add overlap checks; mark read/write ranges in bytes.
- Use MaybeRealWcsnlen when available to bound reads.
- Register Windows static runtime thunk where applicable.
### Tests (wcscpy/wcsncpy/wcscat/wcsncat)
- Android: keep `%env_asan_opts=log_to_stderr=1` so the ASan header is on stderr.
- Darwin: tolerate reordering by putting all four key lines in one DAG group:
```cpp
// CHECK-DAG: Good so far.
// CHECK-DAG: ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:...]] at pc {{...}} bp {{...}} sp {{...}}
// CHECK-DAG: WRITE of size {{[0-9]+}} at [[ADDR]] thread T0
// CHECK-DAG: #<!-- -->0 {{0x[0-9a-f]+}} in <func>
```
### Risk
- Functional reland (runtime + tests), intended to restore functionality and maintain stability across platforms.
---
Full diff: https://github.com/llvm/llvm-project/pull/162028.diff
8 Files Affected:
- (modified) compiler-rt/lib/asan/asan_interceptors.cpp (+43-3)
- (modified) compiler-rt/lib/asan/asan_interceptors.h (+1)
- (modified) compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp (+4)
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h (+1-1)
- (added) compiler-rt/test/asan/TestCases/wcscat.cpp (+26)
- (added) compiler-rt/test/asan/TestCases/wcscpy.cpp (+23)
- (added) compiler-rt/test/asan/TestCases/wcsncat.cpp (+27)
- (added) compiler-rt/test/asan/TestCases/wcsncpy.cpp (+24)
``````````diff
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index 7c9a08b9083a2..0f613f0fdc30b 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -58,13 +58,20 @@ namespace __asan {
static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
#if SANITIZER_INTERCEPT_STRNLEN
- if (REAL(strnlen)) {
+ if (REAL(strnlen))
return REAL(strnlen)(s, maxlen);
- }
-#endif
+# endif
return internal_strnlen(s, maxlen);
}
+static inline uptr MaybeRealWcsnlen(const wchar_t* s, uptr maxlen) {
+# if SANITIZER_INTERCEPT_WCSNLEN
+ if (REAL(wcsnlen))
+ return REAL(wcsnlen)(s, maxlen);
+# endif
+ return internal_wcsnlen(s, maxlen);
+}
+
void SetThreadName(const char *name) {
AsanThread *t = GetCurrentThread();
if (t)
@@ -570,6 +577,20 @@ INTERCEPTOR(char *, strcpy, char *to, const char *from) {
return REAL(strcpy)(to, from);
}
+INTERCEPTOR(wchar_t*, wcscpy, wchar_t* to, const wchar_t* from) {
+ void* ctx;
+ ASAN_INTERCEPTOR_ENTER(ctx, wcscpy);
+ if (!TryAsanInitFromRtl())
+ return REAL(wcscpy)(to, from);
+ if (flags()->replace_str) {
+ uptr size = (internal_wcslen(from) + 1) * sizeof(wchar_t);
+ CHECK_RANGES_OVERLAP("wcscpy", to, size, from, size);
+ ASAN_READ_RANGE(ctx, from, size);
+ ASAN_WRITE_RANGE(ctx, to, size);
+ }
+ return REAL(wcscpy)(to, from);
+}
+
// Windows doesn't always define the strdup identifier,
// and when it does it's a macro defined to either _strdup
// or _strdup_dbg, _strdup_dbg ends up calling _strdup, so
@@ -633,6 +654,20 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, usize size) {
return REAL(strncpy)(to, from, size);
}
+INTERCEPTOR(wchar_t*, wcsncpy, wchar_t* to, const wchar_t* from, uptr size) {
+ void* ctx;
+ ASAN_INTERCEPTOR_ENTER(ctx, wcsncpy);
+ AsanInitFromRtl();
+ if (flags()->replace_str) {
+ uptr from_size =
+ Min(size, MaybeRealWcsnlen(from, size) + 1) * sizeof(wchar_t);
+ CHECK_RANGES_OVERLAP("wcsncpy", to, from_size, from, from_size);
+ ASAN_READ_RANGE(ctx, from, from_size);
+ ASAN_WRITE_RANGE(ctx, to, size * sizeof(wchar_t));
+ }
+ return REAL(wcsncpy)(to, from, size);
+}
+
template <typename Fn>
static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
char **endptr, int base)
@@ -809,6 +844,11 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(strncat);
ASAN_INTERCEPT_FUNC(strncpy);
ASAN_INTERCEPT_FUNC(strdup);
+
+ // Intercept wcs* functions.
+ ASAN_INTERCEPT_FUNC(wcscpy);
+ ASAN_INTERCEPT_FUNC(wcsncpy);
+
# if ASAN_INTERCEPT___STRDUP
ASAN_INTERCEPT_FUNC(__strdup);
#endif
diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index 3e2386eaf8092..2d551cfafd1f5 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -129,6 +129,7 @@ DECLARE_REAL(char*, strchr, const char *str, int c)
DECLARE_REAL(SIZE_T, strlen, const char *s)
DECLARE_REAL(char*, strncpy, char *to, const char *from, SIZE_T size)
DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen)
+DECLARE_REAL(SIZE_T, wcsnlen, const wchar_t* s, SIZE_T maxlen)
DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
# if !SANITIZER_APPLE
diff --git a/compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp b/compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp
index 4a69b66574039..46e0e90738f24 100644
--- a/compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp
+++ b/compiler-rt/lib/asan/asan_win_static_runtime_thunk.cpp
@@ -63,6 +63,10 @@ INTERCEPT_LIBRARY_FUNCTION_ASAN(strpbrk);
INTERCEPT_LIBRARY_FUNCTION_ASAN(strspn);
INTERCEPT_LIBRARY_FUNCTION_ASAN(strstr);
INTERCEPT_LIBRARY_FUNCTION_ASAN(strtok);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcscat);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcscpy);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcsncat);
+INTERCEPT_LIBRARY_FUNCTION_ASAN(wcsncpy);
INTERCEPT_LIBRARY_FUNCTION_ASAN(wcslen);
INTERCEPT_LIBRARY_FUNCTION_ASAN(wcsnlen);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 29987decdff45..88ecd7e16306a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -551,7 +551,7 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment,
#define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD)
#define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
#define SANITIZER_INTERCEPT_WCSLEN 1
-#define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
+#define SANITIZER_INTERCEPT_WCSCAT (SI_POSIX || SI_WINDOWS)
#define SANITIZER_INTERCEPT_WCSDUP SI_POSIX
#define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
#define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
diff --git a/compiler-rt/test/asan/TestCases/wcscat.cpp b/compiler-rt/test/asan/TestCases/wcscat.cpp
new file mode 100644
index 0000000000000..833dee9655264
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscat.cpp
@@ -0,0 +1,26 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O1 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O2 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O3 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ const wchar_t *start = L"X means ";
+ const wchar_t *append = L"dog";
+ wchar_t goodDst[12];
+ wcscpy(goodDst, start);
+ wcscat(goodDst, append);
+
+ wchar_t badDst[9];
+ wcscpy(badDst, start);
+ fprintf(stderr, "Good so far.\n");
+ // CHECK-DAG: Good so far.
+ fflush(stderr);
+ wcscat(badDst, append); // Boom!
+ // CHECK-DAG: ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK-DAG: WRITE of size {{[0-9]+}} at [[ADDR]] thread T0
+ // CHECK-DAG: #0 {{0x[0-9a-f]+}} in wcscat
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcscpy.cpp b/compiler-rt/test/asan/TestCases/wcscpy.cpp
new file mode 100644
index 0000000000000..c7205e486e3ff
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscpy.cpp
@@ -0,0 +1,23 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O1 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O2 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O3 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ const wchar_t *src = L"X means dog";
+ wchar_t goodDst[12];
+ wcscpy(goodDst, src);
+
+ wchar_t badDst[7];
+ fprintf(stderr, "Good so far.\n");
+ // CHECK-DAG: Good so far.
+ fflush(stderr);
+ wcscpy(badDst, src); // Boom!
+ // CHECK-DAG: ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK-DAG: WRITE of size {{[0-9]+}} at [[ADDR]] thread T0
+ // CHECK-DAG: #0 {{0x[0-9a-f]+}} in wcscpy
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncat.cpp b/compiler-rt/test/asan/TestCases/wcsncat.cpp
new file mode 100644
index 0000000000000..8fe1e510a26cf
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncat.cpp
@@ -0,0 +1,27 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O1 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O2 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O3 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ const wchar_t *start = L"X means ";
+ const wchar_t *append = L"dog";
+ wchar_t goodDst[15];
+ wcscpy(goodDst, start);
+ wcsncat(goodDst, append, 5);
+
+ wchar_t badDst[11];
+ wcscpy(badDst, start);
+ wcsncat(badDst, append, 1);
+ fprintf(stderr, "Good so far.\n");
+ // CHECK-DAG: Good so far.
+ fflush(stderr);
+ wcsncat(badDst, append, 3); // Boom!
+ // CHECK-DAG: ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK-DAG: WRITE of size {{[0-9]+}} at [[ADDR]] thread T0
+ // CHECK-DAG: #0 {{0x[0-9a-f]+}} in wcsncat
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncpy.cpp b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
new file mode 100644
index 0000000000000..7db3351cdb6a6
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
@@ -0,0 +1,24 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O1 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O2 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_asan -O3 %s -o %t && not %env_asan_opts=log_to_stderr=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+ const wchar_t *src = L"X means dog";
+ wchar_t goodDst[12];
+ wcsncpy(goodDst, src, 12);
+
+ wchar_t badDst[7];
+ wcsncpy(badDst, src, 7); // This should still work.
+ fprintf(stderr, "Good so far.\n");
+ // CHECK-DAG: Good so far.
+ fflush(stderr);
+ wcsncpy(badDst, src, 15); // Boom!
+ // CHECK-DAG: ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+ // CHECK-DAG: WRITE of size {{[0-9]+}} at [[ADDR]] thread T0
+ // CHECK-DAG: #0 {{0x[0-9a-f]+}} in wcsncpy
+ printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/162028
More information about the llvm-commits
mailing list