[compiler-rt] Enable ASAN checks for wcscat/wcsncat on Windows, and wcscpy/wcsncpy on all platforms (PR #90909)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 2 15:11:57 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (branh)

<details>
<summary>Changes</summary>

Use of wchar versions of string functions is common on Windows.

We (Microsoft) are working to improve ASAN instrumentation of these functions, to bring wchar_t* support up to parity with the char* functions in string.h.

We're starting by enabling the wcscat/wcsncat functions on Windows which have already been enabled on POSIX systems, and adding interceptors for wcscpy and wcsncpy.

---
Full diff: https://github.com/llvm/llvm-project/pull/90909.diff


8 Files Affected:

- (modified) compiler-rt/lib/asan/asan_interceptors.cpp (+43) 
- (modified) compiler-rt/lib/asan/asan_interceptors.h (+1) 
- (modified) compiler-rt/lib/asan/asan_win_dll_thunk.cpp (+4) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h (+1-1) 
- (added) compiler-rt/test/asan/TestCases/wcscat.cpp (+51) 
- (added) compiler-rt/test/asan/TestCases/wcscpy.cpp (+50) 
- (added) compiler-rt/test/asan/TestCases/wcsncat.cpp (+52) 
- (added) compiler-rt/test/asan/TestCases/wcsncpy.cpp (+51) 


``````````diff
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index 6d1360e104975f..bb2b100e089acd 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -65,6 +65,15 @@ static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
   return internal_strnlen(s, maxlen);
 }
 
+static inline uptr MaybeRealWcsnlen(const wchar_t *s, uptr maxlen) {
+#  if SANITIZER_INTERCEPT_STRNLEN
+  if (REAL(wcsnlen)) {
+    return REAL(wcsnlen)(s, maxlen);
+  }
+#  endif
+  return internal_wcsnlen(s, maxlen);
+}
+
 void SetThreadName(const char *name) {
   AsanThread *t = GetCurrentThread();
   if (t)
@@ -570,6 +579,21 @@ INTERCEPTOR(char *, strcpy, char *to, const char *from) {
   return REAL(strcpy)(to, from);
 }
 
+INTERCEPTOR(wchar_t *, wcscpy, wchar_t *to, const wchar_t *from) {
+  void *ctx;
+  ASAN_INTERCEPTOR_ENTER(ctx, wcscpy);
+  if (!TryAsanInitFromRtl())
+    return REAL(wcscpy)(to, from);
+
+  if (flags()->replace_str) {
+    uptr from_size = (internal_wcslen(from) + 1) * sizeof(wchar_t);
+    CHECK_RANGES_OVERLAP("wcscpy", to, from_size, from, from_size);
+    ASAN_READ_RANGE(ctx, from, from_size);
+    ASAN_WRITE_RANGE(ctx, to, from_size);
+  }
+  return REAL(wcscpy)(to, from);
+}
+
 // Windows doesn't always define the strdup identifier,
 // and when it does it's a macro defined to either _strdup
 // or _strdup_dbg, _strdup_dbg ends up calling _strdup, so
@@ -630,6 +654,20 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
   return REAL(strncpy)(to, from, size);
 }
 
+INTERCEPTOR(wchar_t *, wcsncpy, wchar_t *to, const wchar_t *from, uptr size) {
+  void *ctx;
+  ASAN_INTERCEPTOR_ENTER(ctx, strncpy);
+  AsanInitFromRtl();
+  if (flags()->replace_str) {
+    uptr from_size =
+        Min(size, MaybeRealWcsnlen(from, size) + 1) * sizeof(wchar_t);
+    CHECK_RANGES_OVERLAP("wcsncpy", to, from_size, from, from_size);
+    ASAN_READ_RANGE(ctx, from, from_size);
+    ASAN_WRITE_RANGE(ctx, to, size);
+  }
+  return REAL(wcsncpy)(to, from, size);
+}
+
 template <typename Fn>
 static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
                                      char **endptr, int base)
@@ -781,6 +819,11 @@ void InitializeAsanInterceptors() {
   ASAN_INTERCEPT_FUNC(strncat);
   ASAN_INTERCEPT_FUNC(strncpy);
   ASAN_INTERCEPT_FUNC(strdup);
+
+  // Intercept wcs* functions.
+  ASAN_INTERCEPT_FUNC(wcscpy);
+  ASAN_INTERCEPT_FUNC(wcsncpy);
+
 #  if ASAN_INTERCEPT___STRDUP
   ASAN_INTERCEPT_FUNC(__strdup);
 #endif
diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index 826b45f5ada8c0..f38fe100a611a1 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -129,6 +129,7 @@ DECLARE_REAL(char*, strchr, const char *str, int c)
 DECLARE_REAL(SIZE_T, strlen, const char *s)
 DECLARE_REAL(char*, strncpy, char *to, const char *from, uptr size)
 DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen)
+DECLARE_REAL(uptr, wcsnlen, const wchar_t *s, uptr maxlen)
 DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
 
 #  if !SANITIZER_APPLE
diff --git a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
index 35871a942a7a12..f1fb4b07400d52 100644
--- a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
+++ b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp
@@ -93,6 +93,10 @@ INTERCEPT_LIBRARY_FUNCTION(strstr);
 INTERCEPT_LIBRARY_FUNCTION(strtok);
 INTERCEPT_LIBRARY_FUNCTION(strtol);
 INTERCEPT_LIBRARY_FUNCTION(strtoll);
+INTERCEPT_LIBRARY_FUNCTION(wcscat);
+INTERCEPT_LIBRARY_FUNCTION(wcscpy);
+INTERCEPT_LIBRARY_FUNCTION(wcsncat);
+INTERCEPT_LIBRARY_FUNCTION(wcsncpy);
 INTERCEPT_LIBRARY_FUNCTION(wcslen);
 INTERCEPT_LIBRARY_FUNCTION(wcsnlen);
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index de55c736d0e144..417355f5f16ea7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -498,7 +498,7 @@
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD)
 #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WCSLEN 1
-#define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
+#define SANITIZER_INTERCEPT_WCSCAT 1
 #define SANITIZER_INTERCEPT_WCSDUP SI_POSIX
 #define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
 #define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
diff --git a/compiler-rt/test/asan/TestCases/wcscat.cpp b/compiler-rt/test/asan/TestCases/wcscat.cpp
new file mode 100644
index 00000000000000..c870e4c1e95b46
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscat.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+  wchar_t *start = L"X means ";
+  wchar_t *append = L"dog";
+  wchar_t goodDst[12];
+  wcscpy(goodDst, start);
+  wcscat(goodDst, append);
+
+  wchar_t badDst[9];
+  wcscpy(badDst, start);
+  printf("Good so far.\n");
+  // CHECK: Good so far.
+  wcscat(badDst, append); // Boom!
+  // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+  // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+  // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcscat {{.*}}\sanitizer_common_interceptors.inc:{{[0-9]+}}
+  // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcscat.cpp:22
+  // CHECK: This frame has 2 object(s):
+  // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+  // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+  // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+  // CHECK: Shadow bytes around the buggy address:
+  // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+  // CHECK-NEXT: Addressable:           00
+  // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+  // CHECK-NEXT: Heap left redzone:       fa
+  // CHECK-NEXT: Freed heap region:       fd
+  // CHECK-NEXT: Stack left redzone:      f1
+  // CHECK-NEXT: Stack mid redzone:       f2
+  // CHECK-NEXT: Stack right redzone:     f3
+  // CHECK-NEXT: Stack after return:      f5
+  // CHECK-NEXT: Stack use after scope:   f8
+  // CHECK-NEXT: Global redzone:          f9
+  // CHECK-NEXT: Global init order:       f6
+  // CHECK-NEXT: Poisoned by user:        f7
+  // CHECK-NEXT: Container overflow:      fc
+  // CHECK-NEXT: Array cookie:            ac
+  // CHECK-NEXT: Intra object redzone:    bb
+  // CHECK-NEXT: ASan internal:           fe
+  // CHECK-NEXT: Left alloca redzone:     ca
+  // CHECK-NEXT: Right alloca redzone:    cb
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcscpy.cpp b/compiler-rt/test/asan/TestCases/wcscpy.cpp
new file mode 100644
index 00000000000000..9ae4d8b9418b93
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcscpy.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+  wchar_t *src = L"X means dog";
+  wchar_t goodDst[12];
+  wcscpy(goodDst, src);
+
+  wchar_t badDst[7];
+  printf("Good so far.\n");
+  // CHECK: Good so far.
+
+  wcscpy(badDst, src); // Boom!
+  // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+  // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+  // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcscpy {{.*}}\asan_interceptors.cpp:{{[0-9]+}}
+  // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcscpy.cpp:20
+  // CHECK: This frame has 2 object(s):
+  // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+  // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+  // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+  // CHECK: Shadow bytes around the buggy address:
+  // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+  // CHECK-NEXT: Addressable:           00
+  // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+  // CHECK-NEXT: Heap left redzone:       fa
+  // CHECK-NEXT: Freed heap region:       fd
+  // CHECK-NEXT: Stack left redzone:      f1
+  // CHECK-NEXT: Stack mid redzone:       f2
+  // CHECK-NEXT: Stack right redzone:     f3
+  // CHECK-NEXT: Stack after return:      f5
+  // CHECK-NEXT: Stack use after scope:   f8
+  // CHECK-NEXT: Global redzone:          f9
+  // CHECK-NEXT: Global init order:       f6
+  // CHECK-NEXT: Poisoned by user:        f7
+  // CHECK-NEXT: Container overflow:      fc
+  // CHECK-NEXT: Array cookie:            ac
+  // CHECK-NEXT: Intra object redzone:    bb
+  // CHECK-NEXT: ASan internal:           fe
+  // CHECK-NEXT: Left alloca redzone:     ca
+  // CHECK-NEXT: Right alloca redzone:    cb
+  printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncat.cpp b/compiler-rt/test/asan/TestCases/wcsncat.cpp
new file mode 100644
index 00000000000000..81c6872b7ec489
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncat.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+  wchar_t *start = L"X means ";
+  wchar_t *append = L"dog";
+  wchar_t goodDst[15];
+  wcscpy(goodDst, start);
+  wcsncat(goodDst, append, 5);
+
+  wchar_t badDst[11];
+  wcscpy(badDst, start);
+  wcsncat(badDst, append, 1);
+  printf("Good so far.\n");
+  // CHECK: Good so far.
+  wcsncat(badDst, append, 3); // Boom!
+  // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+  // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+  // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcsncat {{.*}}\sanitizer_common_interceptors.inc:{{[0-9]+}}
+  // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcsncat.cpp:23
+  // CHECK: This frame has 2 object(s):
+  // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+  // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+  // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+  // CHECK: Shadow bytes around the buggy address:
+  // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+  // CHECK-NEXT: Addressable:           00
+  // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+  // CHECK-NEXT: Heap left redzone:       fa
+  // CHECK-NEXT: Freed heap region:       fd
+  // CHECK-NEXT: Stack left redzone:      f1
+  // CHECK-NEXT: Stack mid redzone:       f2
+  // CHECK-NEXT: Stack right redzone:     f3
+  // CHECK-NEXT: Stack after return:      f5
+  // CHECK-NEXT: Stack use after scope:   f8
+  // CHECK-NEXT: Global redzone:          f9
+  // CHECK-NEXT: Global init order:       f6
+  // CHECK-NEXT: Poisoned by user:        f7
+  // CHECK-NEXT: Container overflow:      fc
+  // CHECK-NEXT: Array cookie:            ac
+  // CHECK-NEXT: Intra object redzone:    bb
+  // CHECK-NEXT: ASan internal:           fe
+  // CHECK-NEXT: Left alloca redzone:     ca
+  // CHECK-NEXT: Right alloca redzone:    cb
+}
\ No newline at end of file
diff --git a/compiler-rt/test/asan/TestCases/wcsncpy.cpp b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
new file mode 100644
index 00000000000000..0d0c9a142e418d
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/wcsncpy.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cl_asan -Od -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_cl_asan -O2 -Zi %s -Fe%t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <wchar.h>
+
+int main() {
+  wchar_t *src = L"X means dog";
+  wchar_t goodDst[12];
+  wcsncpy(goodDst, src, 12);
+
+  wchar_t badDst[7];
+  wcsncpy(badDst, src, 7); // This should still work.
+  printf("Good so far.\n");
+  // CHECK: Good so far.
+
+  wcsncpy(badDst, src, 15); // Boom!
+  // CHECK:ERROR: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]] at pc {{0x[0-9a-f]+}} bp {{0x[0-9a-f]+}} sp {{0x[0-9a-f]+}}
+  // CHECK: WRITE of size {{[0-9]+}} at [[ADDR:0x[0-9a-f]+]] thread T0
+  // CHECK: #0 [[ADDR:0x[0-9a-f]+]] in wcsncpy {{.*}}\asan_interceptors.cpp:{{[0-9]+}}
+  // CHECK: #1 [[ADDR:0x[0-9a-f]+]] in main {{.*}}\TestCases\wcsncpy.cpp:21
+  // CHECK: This frame has 2 object(s):
+  // CHECK: HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
+  // CHECK: (longjmp, SEH and C++ exceptions *are* supported)
+  // CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow {{.*}} in main
+  // CHECK: Shadow bytes around the buggy address:
+  // CHECK: Shadow byte legend (one shadow byte represents 8 application bytes):
+  // CHECK-NEXT: Addressable:           00
+  // CHECK-NEXT: Partially addressable: 01 02 03 04 05 06 07
+  // CHECK-NEXT: Heap left redzone:       fa
+  // CHECK-NEXT: Freed heap region:       fd
+  // CHECK-NEXT: Stack left redzone:      f1
+  // CHECK-NEXT: Stack mid redzone:       f2
+  // CHECK-NEXT: Stack right redzone:     f3
+  // CHECK-NEXT: Stack after return:      f5
+  // CHECK-NEXT: Stack use after scope:   f8
+  // CHECK-NEXT: Global redzone:          f9
+  // CHECK-NEXT: Global init order:       f6
+  // CHECK-NEXT: Poisoned by user:        f7
+  // CHECK-NEXT: Container overflow:      fc
+  // CHECK-NEXT: Array cookie:            ac
+  // CHECK-NEXT: Intra object redzone:    bb
+  // CHECK-NEXT: ASan internal:           fe
+  // CHECK-NEXT: Left alloca redzone:     ca
+  // CHECK-NEXT: Right alloca redzone:    cb
+  printf("Should have failed with ASAN error.\n");
+}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/90909


More information about the llvm-commits mailing list