[compiler-rt] ad7e250 - [sanitizer] Intercept glibc 2.38 __isoc23_* functions
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 28 00:49:53 PDT 2023
Author: Fangrui Song
Date: 2023-08-28T00:49:49-07:00
New Revision: ad7e2501000da2494860f06a306dfe8c08cc07c3
URL: https://github.com/llvm/llvm-project/commit/ad7e2501000da2494860f06a306dfe8c08cc07c3
DIFF: https://github.com/llvm/llvm-project/commit/ad7e2501000da2494860f06a306dfe8c08cc07c3.diff
LOG: [sanitizer] Intercept glibc 2.38 __isoc23_* functions
`strtol("0b1", 0, 0)` can be (pre-C23) 0 or (C23) 1.
`sscanf("0b10", "%i", &x)` is similar. glibc 2.38 introduced
`__isoc23_strtol` and `__isoc23_scanf` family functions for binary
compatibility.
When `_ISOC2X_SOURCE` is defined (implied by `_GNU_SOURCE`) or
`__STDC_VERSION__ > 201710L`, `__GLIBC_USE_ISOC2X` is defined to 1 and
these `__isoc23_*` symbols are used.
Add `__isoc23_` versions for the following interceptors:
* sanitizer_common_interceptors.inc implements strtoimax/strtoumax.
Remove incorrect FIXME about https://github.com/google/sanitizers/issues/321
* asan_interceptors.cpp implements just strtol and strtoll. The default
`replace_str` mode checks `nptr` is readable and `endptr` is writable.
atoi reuses the existing strtol interceptor.
* msan_interceptors.cpp implements strtol family functions and their
`_l` versions. Tested by lib/msan/tests/msan_test.cpp
* sanitizer_common_interceptors.inc implements scanf family functions.
The strtol family functions are spreaded, which is not great, but the
patch (intended for release/17.x) does not attempt to address the issue.
Add symbols to lib/sanitizer_common/symbolizer/scripts/global_symbols.txt to
support both glibc pre-2.38 and 2.38.
When build bots migrate to glibc 2.38+, we will lose test coverage for
non-isoc23 versions since the existing C++ unittests imply `_GNU_SOURCE`.
Add test/sanitizer_common/TestCases/{strtol.c,scanf.c}.
They catch msan false positive in the absence of the interceptors.
Fix https://github.com/llvm/llvm-project/issues/64388
Fix https://github.com/llvm/llvm-project/issues/64946
Link: https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00010.html
("The GNU C Library version 2.38 is now available")
Reviewed By: #sanitizers, vitalybuka, mgorny
Differential Revision: https://reviews.llvm.org/D158943
Added:
compiler-rt/test/sanitizer_common/TestCases/scanf.c
compiler-rt/test/sanitizer_common/TestCases/strtol.c
Modified:
compiler-rt/lib/asan/asan_interceptors.cpp
compiler-rt/lib/msan/msan_interceptors.cpp
compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
Removed:
################################################################################
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index a9f891ef24af8d..fc7c5b7b8fbfad 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -602,19 +602,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
return REAL(strncpy)(to, from, size);
}
-INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
- void *ctx;
- ASAN_INTERCEPTOR_ENTER(ctx, strtol);
- ENSURE_ASAN_INITED();
- if (!flags()->replace_str) {
- return REAL(strtol)(nptr, endptr, base);
- }
+template <typename Fn>
+static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
+ char **endptr, int base)
+ -> decltype(real(nullptr, nullptr, 0)) {
+ if (!flags()->replace_str)
+ return real(nptr, endptr, base);
char *real_endptr;
- long result = REAL(strtol)(nptr, &real_endptr, base);
+ auto res = real(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
- return result;
+ return res;
}
+# define INTERCEPTOR_STRTO_BASE(ret_type, func) \
+ INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \
+ void *ctx; \
+ ASAN_INTERCEPTOR_ENTER(ctx, func); \
+ ENSURE_ASAN_INITED(); \
+ return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \
+ }
+
+INTERCEPTOR_STRTO_BASE(long, strtol)
+INTERCEPTOR_STRTO_BASE(long long, strtoll)
+
+# if SANITIZER_GLIBC
+INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol)
+INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll)
+# endif
+
INTERCEPTOR(int, atoi, const char *nptr) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, atoi);
@@ -653,19 +668,6 @@ INTERCEPTOR(long, atol, const char *nptr) {
return result;
}
-INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
- void *ctx;
- ASAN_INTERCEPTOR_ENTER(ctx, strtoll);
- ENSURE_ASAN_INITED();
- if (!flags()->replace_str) {
- return REAL(strtoll)(nptr, endptr, base);
- }
- char *real_endptr;
- long long result = REAL(strtoll)(nptr, &real_endptr, base);
- StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
- return result;
-}
-
INTERCEPTOR(long long, atoll, const char *nptr) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, atoll);
@@ -766,6 +768,10 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(atoll);
ASAN_INTERCEPT_FUNC(strtol);
ASAN_INTERCEPT_FUNC(strtoll);
+# if SANITIZER_GLIBC
+ ASAN_INTERCEPT_FUNC(__isoc23_strtol);
+ ASAN_INTERCEPT_FUNC(__isoc23_strtoll);
+# endif
// Intecept jump-related functions.
ASAN_INTERCEPT_FUNC(longjmp);
diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index f5e0d3cb9a6733..9cb65d55372ff5 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t)
+#if SANITIZER_GLIBC
+INTERCEPTORS_STRTO(double, __isoc23_strtod, char)
+INTERCEPTORS_STRTO(float, __isoc23_strtof, char)
+INTERCEPTORS_STRTO(long double, __isoc23_strtold, char)
+INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char)
+INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char)
+INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char)
+INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char)
+INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char)
+
+INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t)
+INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t)
+INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t)
+INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t)
+INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t)
+INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t)
+INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t)
+#endif
+
#if SANITIZER_NETBSD
#define INTERCEPT_STRTO(func) \
INTERCEPT_FUNCTION(func); \
@@ -1748,6 +1767,24 @@ void InitializeInterceptors() {
INTERCEPT_STRTO(wcstoul);
INTERCEPT_STRTO(wcstoll);
INTERCEPT_STRTO(wcstoull);
+#ifdef SANITIZER_GLIBC
+ INTERCEPT_STRTO(__isoc23_strtod);
+ INTERCEPT_STRTO(__isoc23_strtof);
+ INTERCEPT_STRTO(__isoc23_strtold);
+ INTERCEPT_STRTO(__isoc23_strtol);
+ INTERCEPT_STRTO(__isoc23_strtoul);
+ INTERCEPT_STRTO(__isoc23_strtoll);
+ INTERCEPT_STRTO(__isoc23_strtoull);
+ INTERCEPT_STRTO(__isoc23_strtouq);
+ INTERCEPT_STRTO(__isoc23_wcstod);
+ INTERCEPT_STRTO(__isoc23_wcstof);
+ INTERCEPT_STRTO(__isoc23_wcstold);
+ INTERCEPT_STRTO(__isoc23_wcstol);
+ INTERCEPT_STRTO(__isoc23_wcstoul);
+ INTERCEPT_STRTO(__isoc23_wcstoll);
+ INTERCEPT_STRTO(__isoc23_wcstoull);
+#endif
+
#ifdef SANITIZER_NLDBL_VERSION
INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION);
INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 299561b3ad3a13..0e563fa12022a3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)
INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
+
+INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap)
+
+INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format,
+ va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap)
+
+INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap)
+VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap)
#endif // SANITIZER_INTERCEPT_ISOC99_SCANF
INTERCEPTOR(int, scanf, const char *format, ...)
@@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)
INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
+
+INTERCEPTOR(int, __isoc23_scanf, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format)
+
+INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format)
+
+INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...)
+FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format)
#endif
#endif
@@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \
- COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf);
+ COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_scanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf);
#else
#define INIT_ISOC99_SCANF
#endif
@@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr,
(real_endptr - nptr) + 1 : 0);
}
-
#if SANITIZER_INTERCEPT_STRTOIMAX
-INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
- void *ctx;
- COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
- // FIXME: under ASan the call below may write to freed memory and corrupt
- // its metadata. See
- // https://github.com/google/sanitizers/issues/321.
+template <typename Fn>
+static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr,
+ char **endptr, int base)
+ -> decltype(real(nullptr, nullptr, 0)) {
char *real_endptr;
- INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base);
+ auto res = real(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return res;
}
+INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
+ return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base);
+}
INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
- // FIXME: under ASan the call below may write to freed memory and corrupt
- // its metadata. See
- // https://github.com/google/sanitizers/issues/321.
- char *real_endptr;
- UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base);
- StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
- return res;
+ return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base);
}
#define INIT_STRTOIMAX \
@@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
#define INIT_STRTOIMAX
#endif
+#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC
+INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base);
+ return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base);
+}
+INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base);
+ return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base);
+}
+
+# define INIT_STRTOIMAX_C23 \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \
+ COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax);
+#else
+# define INIT_STRTOIMAX_C23
+#endif
+
#if SANITIZER_INTERCEPT_MBSTOWCS
INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) {
void *ctx;
@@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() {
INIT_GETCWD;
INIT_GET_CURRENT_DIR_NAME;
INIT_STRTOIMAX;
+ INIT_STRTOIMAX_C23;
INIT_MBSTOWCS;
INIT_MBSNRTOWCS;
INIT_WCSTOMBS;
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index 911d8fbbab2dde..b213287915d104 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -36,6 +36,13 @@ __interceptor_pthread_setspecific w
__interceptor_read w
__interceptor_realpath w
__isinf U
+__isoc23_sscanf U
+__isoc23_strtol U
+__isoc23_strtoll U
+__isoc23_strtoll_l U
+__isoc23_strtoull U
+__isoc23_strtoull_l U
+__isoc23_vsscanf U
__isoc99_sscanf U
__isoc99_vsscanf U
__lshrdi3 U
diff --git a/compiler-rt/test/sanitizer_common/TestCases/scanf.c b/compiler-rt/test/sanitizer_common/TestCases/scanf.c
new file mode 100644
index 00000000000000..a7f35c2af57eea
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/scanf.c
@@ -0,0 +1,24 @@
+// RUN: %clang -std=c17 %s -o %t && %run %t
+/// Test __isoc23_* for glibc 2.38+.
+// RUN: %clang -std=c23 %s -o %t && %run %t
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+int test_vsscanf(const char *buf, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ int ret = vsscanf(buf, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+int main(int argc, char **argv) {
+ int x, y;
+ assert(sscanf("42", "%d", &x) == 1);
+ assert(x == 42);
+ assert(test_vsscanf("42", "%d", &y) == 1);
+ assert(y == 42);
+ return 0;
+}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/strtol.c b/compiler-rt/test/sanitizer_common/TestCases/strtol.c
new file mode 100644
index 00000000000000..9947cdeacd8c31
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/strtol.c
@@ -0,0 +1,61 @@
+// RUN: %clang -std=c17 %s -o %t && %run %t
+/// Test __isoc23_* for glibc 2.38+.
+// RUN: %clang -std=c23 %s -o %t && %run %t
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#define TESTL(func) \
+ { \
+ char *end; \
+ long l = (long)func("42", &end, 0); \
+ assert(l == 42); \
+ assert(*end == '\0'); \
+ }
+
+#define TESTF(func) \
+ { \
+ char *end; \
+ long l = (long)func("42", &end); \
+ assert(l == 42); \
+ assert(*end == '\0'); \
+ }
+
+#define WTESTL(func) \
+ { \
+ wchar_t *end; \
+ long l = (long)func(L"42", &end, 0); \
+ assert(l == 42); \
+ assert(*end == L'\0'); \
+ }
+
+#define WTESTF(func) \
+ { \
+ wchar_t *end; \
+ long l = (long)func(L"42", &end); \
+ assert(l == 42); \
+ assert(*end == '\0'); \
+ }
+
+int main() {
+ TESTL(strtol);
+ TESTL(strtoll);
+ TESTL(strtoimax);
+ TESTL(strtoul);
+ TESTL(strtoull);
+ TESTL(strtoumax);
+ TESTF(strtof);
+ TESTF(strtod);
+ TESTF(strtold);
+
+ WTESTL(wcstol);
+ WTESTL(wcstoll);
+ WTESTL(wcstoul);
+ WTESTL(wcstoull);
+ WTESTF(wcstof);
+ WTESTF(wcstod);
+ WTESTF(wcstold);
+}
More information about the llvm-commits
mailing list