[llvm-branch-commits] [compiler-rt] dd230ef - [sanitizer] Intercept glibc 2.38 __isoc23_* functions

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 30 08:08:30 PDT 2023


Author: Fangrui Song
Date: 2023-08-30T17:00:18+02:00
New Revision: dd230efe703f34678ce52280e50238abf908aaa1

URL: https://github.com/llvm/llvm-project/commit/dd230efe703f34678ce52280e50238abf908aaa1
DIFF: https://github.com/llvm/llvm-project/commit/dd230efe703f34678ce52280e50238abf908aaa1.diff

LOG: [sanitizer] Intercept glibc 2.38 __isoc23_* functions

`strtol("0b1", 0, 0)` can be (pre-C23) 0 or (C23) 1.
`sscanf("0b10", "%i", &x)` is similar. glibc 2.38 introduced
`__isoc23_strtol` and `__isoc23_scanf` family functions for binary
compatibility.

When `_ISOC2X_SOURCE` is defined (implied by `_GNU_SOURCE`) or
`__STDC_VERSION__ > 201710L`, `__GLIBC_USE_ISOC2X` is defined to 1 and
these `__isoc23_*` symbols are used.

Add `__isoc23_` versions for the following interceptors:

* sanitizer_common_interceptors.inc implements strtoimax/strtoumax.
  Remove incorrect FIXME about https://github.com/google/sanitizers/issues/321
* asan_interceptors.cpp implements just strtol and strtoll. The default
  `replace_str` mode checks `nptr` is readable and `endptr` is writable.
  atoi reuses the existing strtol interceptor.
* msan_interceptors.cpp implements strtol family functions and their
  `_l` versions. Tested by lib/msan/tests/msan_test.cpp
* sanitizer_common_interceptors.inc implements scanf family functions.

The strtol family functions are spreaded, which is not great, but the
patch (intended for release/17.x) does not attempt to address the issue.

Add symbols to lib/sanitizer_common/symbolizer/scripts/global_symbols.txt to
support both glibc pre-2.38 and 2.38.

When build bots migrate to glibc 2.38+, we will lose test coverage for
non-isoc23 versions since the existing C++ unittests imply `_GNU_SOURCE`.
Add test/sanitizer_common/TestCases/{strtol.c,scanf.c}.
They catch msan false positive in the absence of the interceptors.

Fix https://github.com/llvm/llvm-project/issues/64388
Fix https://github.com/llvm/llvm-project/issues/64946

Link: https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00010.html
("The GNU C Library version 2.38 is now available")

Reviewed By: #sanitizers, vitalybuka, mgorny

Differential Revision: https://reviews.llvm.org/D158943

(cherry picked from commit ad7e2501000da2494860f06a306dfe8c08cc07c3)

Added: 
    compiler-rt/test/sanitizer_common/TestCases/scanf.c
    compiler-rt/test/sanitizer_common/TestCases/strtol.c

Modified: 
    compiler-rt/lib/asan/asan_interceptors.cpp
    compiler-rt/lib/msan/msan_interceptors.cpp
    compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
    compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index df879b1fbed12b..5158e99b75e5da 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -588,19 +588,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
   return REAL(strncpy)(to, from, size);
 }
 
-INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, strtol);
-  ENSURE_ASAN_INITED();
-  if (!flags()->replace_str) {
-    return REAL(strtol)(nptr, endptr, base);
-  }
+template <typename Fn>
+static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
+                                     char **endptr, int base)
+    -> decltype(real(nullptr, nullptr, 0)) {
+  if (!flags()->replace_str)
+    return real(nptr, endptr, base);
   char *real_endptr;
-  long result = REAL(strtol)(nptr, &real_endptr, base);
+  auto res = real(nptr, &real_endptr, base);
   StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
-  return result;
+  return res;
 }
 
+#  define INTERCEPTOR_STRTO_BASE(ret_type, func)                             \
+    INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \
+      void *ctx;                                                             \
+      ASAN_INTERCEPTOR_ENTER(ctx, func);                                     \
+      ENSURE_ASAN_INITED();                                                  \
+      return StrtolImpl(ctx, REAL(func), nptr, endptr, base);                \
+    }
+
+INTERCEPTOR_STRTO_BASE(long, strtol)
+INTERCEPTOR_STRTO_BASE(long long, strtoll)
+
+#  if SANITIZER_GLIBC
+INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol)
+INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll)
+#  endif
+
 INTERCEPTOR(int, atoi, const char *nptr) {
   void *ctx;
   ASAN_INTERCEPTOR_ENTER(ctx, atoi);
@@ -639,19 +654,6 @@ INTERCEPTOR(long, atol, const char *nptr) {
   return result;
 }
 
-INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, strtoll);
-  ENSURE_ASAN_INITED();
-  if (!flags()->replace_str) {
-    return REAL(strtoll)(nptr, endptr, base);
-  }
-  char *real_endptr;
-  long long result = REAL(strtoll)(nptr, &real_endptr, base);
-  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
-  return result;
-}
-
 INTERCEPTOR(long long, atoll, const char *nptr) {
   void *ctx;
   ASAN_INTERCEPTOR_ENTER(ctx, atoll);
@@ -752,6 +754,10 @@ void InitializeAsanInterceptors() {
   ASAN_INTERCEPT_FUNC(atoll);
   ASAN_INTERCEPT_FUNC(strtol);
   ASAN_INTERCEPT_FUNC(strtoll);
+#  if SANITIZER_GLIBC
+  ASAN_INTERCEPT_FUNC(__isoc23_strtol);
+  ASAN_INTERCEPT_FUNC(__isoc23_strtoll);
+#  endif
 
   // Intecept jump-related functions.
   ASAN_INTERCEPT_FUNC(longjmp);

diff  --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index f5e0d3cb9a6733..9cb65d55372ff5 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t)
 INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t)
 INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t)
 
+#if SANITIZER_GLIBC
+INTERCEPTORS_STRTO(double, __isoc23_strtod, char)
+INTERCEPTORS_STRTO(float, __isoc23_strtof, char)
+INTERCEPTORS_STRTO(long double, __isoc23_strtold, char)
+INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char)
+INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char)
+INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char)
+INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char)
+INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char)
+
+INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t)
+INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t)
+INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t)
+INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t)
+INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t)
+INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t)
+INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t)
+#endif
+
 #if SANITIZER_NETBSD
 #define INTERCEPT_STRTO(func) \
   INTERCEPT_FUNCTION(func); \
@@ -1748,6 +1767,24 @@ void InitializeInterceptors() {
   INTERCEPT_STRTO(wcstoul);
   INTERCEPT_STRTO(wcstoll);
   INTERCEPT_STRTO(wcstoull);
+#ifdef SANITIZER_GLIBC
+  INTERCEPT_STRTO(__isoc23_strtod);
+  INTERCEPT_STRTO(__isoc23_strtof);
+  INTERCEPT_STRTO(__isoc23_strtold);
+  INTERCEPT_STRTO(__isoc23_strtol);
+  INTERCEPT_STRTO(__isoc23_strtoul);
+  INTERCEPT_STRTO(__isoc23_strtoll);
+  INTERCEPT_STRTO(__isoc23_strtoull);
+  INTERCEPT_STRTO(__isoc23_strtouq);
+  INTERCEPT_STRTO(__isoc23_wcstod);
+  INTERCEPT_STRTO(__isoc23_wcstof);
+  INTERCEPT_STRTO(__isoc23_wcstold);
+  INTERCEPT_STRTO(__isoc23_wcstol);
+  INTERCEPT_STRTO(__isoc23_wcstoul);
+  INTERCEPT_STRTO(__isoc23_wcstoll);
+  INTERCEPT_STRTO(__isoc23_wcstoull);
+#endif
+
 #ifdef SANITIZER_NLDBL_VERSION
   INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION);
   INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION);

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 299561b3ad3a13..0e563fa12022a3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)
 
 INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap)
 VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
+
+INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap)
+
+INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format,
+            va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap)
+
+INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap)
 #endif  // SANITIZER_INTERCEPT_ISOC99_SCANF
 
 INTERCEPTOR(int, scanf, const char *format, ...)
@@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)
 
 INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...)
 FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
+
+INTERCEPTOR(int, __isoc23_scanf, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format)
+
+INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format)
+
+INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format)
 #endif
 
 #endif
@@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
   COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf);  \
   COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf);  \
   COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \
-  COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf);
+  COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_scanf);   \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf);  \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf);  \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf);  \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \
+  COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf);
 #else
 #define INIT_ISOC99_SCANF
 #endif
@@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr,
                                  (real_endptr - nptr) + 1 : 0);
 }
 
-
 #if SANITIZER_INTERCEPT_STRTOIMAX
-INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
-  // FIXME: under ASan the call below may write to freed memory and corrupt
-  // its metadata. See
-  // https://github.com/google/sanitizers/issues/321.
+template <typename Fn>
+static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr,
+                                        char **endptr, int base)
+    -> decltype(real(nullptr, nullptr, 0)) {
   char *real_endptr;
-  INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base);
+  auto res = real(nptr, &real_endptr, base);
   StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
   return res;
 }
 
+INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
+  return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base);
+}
 INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
-  // FIXME: under ASan the call below may write to freed memory and corrupt
-  // its metadata. See
-  // https://github.com/google/sanitizers/issues/321.
-  char *real_endptr;
-  UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base);
-  StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
-  return res;
+  return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base);
 }
 
 #define INIT_STRTOIMAX                  \
@@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
 #define INIT_STRTOIMAX
 #endif
 
+#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC
+INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base);
+  return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base);
+}
+INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base);
+  return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base);
+}
+
+#  define INIT_STRTOIMAX_C23                       \
+    COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \
+    COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax);
+#else
+#  define INIT_STRTOIMAX_C23
+#endif
+
 #if SANITIZER_INTERCEPT_MBSTOWCS
 INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) {
   void *ctx;
@@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() {
   INIT_GETCWD;
   INIT_GET_CURRENT_DIR_NAME;
   INIT_STRTOIMAX;
+  INIT_STRTOIMAX_C23;
   INIT_MBSTOWCS;
   INIT_MBSNRTOWCS;
   INIT_WCSTOMBS;

diff  --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index 509e3f19fe3868..819cfca44b00bc 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -34,6 +34,13 @@ __interceptor_pthread_setspecific w
 __interceptor_read w
 __interceptor_realpath w
 __isinf U
+__isoc23_sscanf U
+__isoc23_strtol U
+__isoc23_strtoll U
+__isoc23_strtoll_l U
+__isoc23_strtoull U
+__isoc23_strtoull_l U
+__isoc23_vsscanf U
 __isoc99_sscanf U
 __isoc99_vsscanf U
 __moddi3 U

diff  --git a/compiler-rt/test/sanitizer_common/TestCases/scanf.c b/compiler-rt/test/sanitizer_common/TestCases/scanf.c
new file mode 100644
index 00000000000000..a7f35c2af57eea
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/scanf.c
@@ -0,0 +1,24 @@
+// RUN: %clang -std=c17 %s -o %t && %run %t
+/// Test __isoc23_* for glibc 2.38+.
+// RUN: %clang -std=c23 %s -o %t && %run %t
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+int test_vsscanf(const char *buf, const char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  int ret = vsscanf(buf, fmt, ap);
+  va_end(ap);
+  return ret;
+}
+
+int main(int argc, char **argv) {
+  int x, y;
+  assert(sscanf("42", "%d", &x) == 1);
+  assert(x == 42);
+  assert(test_vsscanf("42", "%d", &y) == 1);
+  assert(y == 42);
+  return 0;
+}

diff  --git a/compiler-rt/test/sanitizer_common/TestCases/strtol.c b/compiler-rt/test/sanitizer_common/TestCases/strtol.c
new file mode 100644
index 00000000000000..9947cdeacd8c31
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/strtol.c
@@ -0,0 +1,61 @@
+// RUN: %clang -std=c17 %s -o %t && %run %t
+/// Test __isoc23_* for glibc 2.38+.
+// RUN: %clang -std=c23 %s -o %t && %run %t
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#define TESTL(func)                                                            \
+  {                                                                            \
+    char *end;                                                                 \
+    long l = (long)func("42", &end, 0);                                        \
+    assert(l == 42);                                                           \
+    assert(*end == '\0');                                                      \
+  }
+
+#define TESTF(func)                                                            \
+  {                                                                            \
+    char *end;                                                                 \
+    long l = (long)func("42", &end);                                           \
+    assert(l == 42);                                                           \
+    assert(*end == '\0');                                                      \
+  }
+
+#define WTESTL(func)                                                           \
+  {                                                                            \
+    wchar_t *end;                                                              \
+    long l = (long)func(L"42", &end, 0);                                       \
+    assert(l == 42);                                                           \
+    assert(*end == L'\0');                                                     \
+  }
+
+#define WTESTF(func)                                                           \
+  {                                                                            \
+    wchar_t *end;                                                              \
+    long l = (long)func(L"42", &end);                                          \
+    assert(l == 42);                                                           \
+    assert(*end == '\0');                                                      \
+  }
+
+int main() {
+  TESTL(strtol);
+  TESTL(strtoll);
+  TESTL(strtoimax);
+  TESTL(strtoul);
+  TESTL(strtoull);
+  TESTL(strtoumax);
+  TESTF(strtof);
+  TESTF(strtod);
+  TESTF(strtold);
+
+  WTESTL(wcstol);
+  WTESTL(wcstoll);
+  WTESTL(wcstoul);
+  WTESTL(wcstoull);
+  WTESTF(wcstof);
+  WTESTF(wcstod);
+  WTESTF(wcstold);
+}


        


More information about the llvm-branch-commits mailing list