[compiler-rt] ad294e5 - [sanitizers] Fix interception of GLibc regexec
Alex Richardson via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 8 02:54:14 PST 2021
Author: Alex Richardson
Date: 2021-03-08T10:53:55Z
New Revision: ad294e572bc5c16f9dc420cc994322de6ca3fbfb
URL: https://github.com/llvm/llvm-project/commit/ad294e572bc5c16f9dc420cc994322de6ca3fbfb
DIFF: https://github.com/llvm/llvm-project/commit/ad294e572bc5c16f9dc420cc994322de6ca3fbfb.diff
LOG: [sanitizers] Fix interception of GLibc regexec
Previously, on GLibc systems, the interceptor was calling __compat_regexec
(regexec at GLIBC_2.2.5) insead of the newer __regexec (regexec at GLIBC_2.3.4).
The __compat_regexec strips the REG_STARTEND flag but does not report an
error if other flags are present. This can result in infinite loops for
programs that use REG_STARTEND to find all matches inside a buffer (since
ignoring REG_STARTEND means that the search always starts from the first
character).
The underlying issue is that GLibc's dlsym(RTLD_NEXT, ...) appears to
always return the oldest versioned symbol instead of the default. This
means it does not match the behaviour of dlsym(RTLD_DEFAULT, ...) or the
behaviour documented in the manpage.
It appears a similar issue was encountered with realpath and worked around
in 77ef78a0a5dbaa364529bd05ed7a7bd9a71dd8d4.
See also https://sourceware.org/bugzilla/show_bug.cgi?id=14932 and
https://sourceware.org/bugzilla/show_bug.cgi?id=1319.
Fixes https://github.com/google/sanitizers/issues/1371
Reviewed By: #sanitizers, vitalybuka, marxin
Differential Revision: https://reviews.llvm.org/D96348
Added:
compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp
Modified:
compiler-rt/lib/asan/asan_interceptors.cpp
compiler-rt/lib/asan/asan_interceptors.h
compiler-rt/lib/memprof/memprof_interceptors.cpp
compiler-rt/lib/memprof/memprof_interceptors.h
compiler-rt/lib/msan/msan_interceptors.cpp
compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
Removed:
################################################################################
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index cd07d51878b1..9db7db89fa1a 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -90,8 +90,10 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
(void) ctx; \
#define COMMON_INTERCEPT_FUNCTION(name) ASAN_INTERCEPT_FUNC(name)
-#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
ASAN_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+ ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
ASAN_WRITE_RANGE(ctx, ptr, size)
#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \
@@ -672,6 +674,7 @@ void InitializeAsanInterceptors() {
// Intercept threading-related functions
#if ASAN_INTERCEPT_PTHREAD_CREATE
+// TODO: this should probably have an unversioned fallback for newer arches?
#if defined(ASAN_PTHREAD_CREATE_VERSION)
ASAN_INTERCEPT_FUNC_VER(pthread_create, ASAN_PTHREAD_CREATE_VERSION);
#else
diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index 45cdb80b1b64..e8c58c2dc6b6 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -145,6 +145,13 @@ DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
VReport(1, "AddressSanitizer: failed to intercept '%s@@%s'\n", #name, \
#ver); \
} while (0)
+#define ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
+ do { \
+ if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
+ VReport(1, "AddressSanitizer: failed to intercept '%s@@%s' or '%s'\n", \
+ #name, #ver, #name); \
+ } while (0)
+
#else
// OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION.
#define ASAN_INTERCEPT_FUNC(name)
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index caa629b9c474..e22768061e70 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -59,6 +59,8 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
#define COMMON_INTERCEPT_FUNCTION(name) MEMPROF_INTERCEPT_FUNC(name)
#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
MEMPROF_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+ MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
MEMPROF_WRITE_RANGE(ptr, size)
#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h
index b6a4fa411254..ca5f3690430a 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.h
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -50,5 +50,11 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
if (!INTERCEPT_FUNCTION_VER(name, ver)) \
VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver); \
} while (0)
+#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
+ do { \
+ if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
+ VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \
+ #ver, #name); \
+ } while (0)
#endif // MEMPROF_INTERCEPTORS_H
diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index 4eea94f1f969..529bf22f05b9 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -1257,10 +1257,18 @@ int OnExit() {
VReport(1, "MemorySanitizer: failed to intercept '%s@@%s'\n", #name, \
#ver); \
} while (0)
+#define MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
+ do { \
+ if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
+ VReport(1, "MemorySanitizer: failed to intercept '%s@@%s' or '%s'\n", \
+ #name, #ver, #name); \
+ } while (0)
#define COMMON_INTERCEPT_FUNCTION(name) MSAN_INTERCEPT_FUNC(name)
-#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
MSAN_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+ MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
#define COMMON_INTERCEPTOR_UNPOISON_PARAM(count) \
UnpoisonParam(count)
#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index f1cf51f5b2bf..5314b8e771c9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -239,6 +239,23 @@ extern const short *_tolower_tab_;
COMMON_INTERCEPT_FUNCTION(fn)
#endif
+#if SANITIZER_GLIBC
+// If we could not find the versioned symbol, fall back to an unversioned
+// lookup. This is needed to work around a GLibc bug that causes dlsym
+// with RTLD_NEXT to return the oldest versioned symbol.
+// See https://sourceware.org/bugzilla/show_bug.cgi?id=14932.
+// For certain symbols (e.g. regexec) we have to perform a versioned lookup,
+// but that versioned symbol will only exist for architectures where the
+// oldest Glibc version pre-dates support for that architecture.
+// For example, regexec at GLIBC_2.3.4 exists on x86_64, but not RISC-V.
+// See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98920.
+#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
+ COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(fn, ver)
+#else
+#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
+ COMMON_INTERCEPT_FUNCTION(fn)
+#endif
+
#ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
{ \
@@ -7781,7 +7798,7 @@ INTERCEPTOR(void, regfree, const void *preg) {
}
#define INIT_REGEX \
COMMON_INTERCEPT_FUNCTION(regcomp); \
- COMMON_INTERCEPT_FUNCTION(regexec); \
+ COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(regexec, "GLIBC_2.3.4"); \
COMMON_INTERCEPT_FUNCTION(regerror); \
COMMON_INTERCEPT_FUNCTION(regfree);
#else
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index b8b964cda58b..a80e24b87373 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2263,6 +2263,8 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
INTERCEPT_FUNCTION_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+ (INTERCEPT_FUNCTION_VER(name, ver) || INTERCEPT_FUNCTION(name))
#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
MemoryAccessRange(((TsanInterceptorContext *)ctx)->thr, \
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp
new file mode 100644
index 000000000000..1a445783bcbe
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp
@@ -0,0 +1,61 @@
+// RUN: %clangxx -O0 -g %s -o %t && %run %t
+/// Check that REG_STARTEND is handled correctly.
+/// This is a regression test for https://github.com/google/sanitizers/issues/1371
+/// Previously, on GLibc systems, the interceptor was calling __compat_regexec
+/// (regexec at GLIBC_2.2.5) insead of the newer __regexec (regexec at GLIBC_2.3.4).
+/// The __compat_regexec strips the REG_STARTEND flag but does not report an error
+/// if other flags are present. This can result in infinite loops for programs that
+/// use REG_STARTEND to find all matches inside a buffer (since ignoring REG_STARTEND
+/// means that the search always starts from the first character).
+
+#include <assert.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void test_matched(const regex_t *preg, const char *string, size_t start,
+ size_t end, const char *expected) {
+ regmatch_t match[1];
+ match[0].rm_so = start;
+ match[0].rm_eo = end;
+ int rv = regexec(preg, string, 1, match, REG_STARTEND);
+ int matchlen = (int)(match[0].rm_eo - match[0].rm_so);
+ const char *matchstart = string + match[0].rm_so;
+ if (rv == 0) {
+ if (expected == nullptr) {
+ fprintf(stderr, "ERROR: expected no match but got '%.*s'\n",
+ matchlen, matchstart);
+ abort();
+ } else if ((size_t)matchlen != strlen(expected) ||
+ memcmp(matchstart, expected, strlen(expected)) != 0) {
+ fprintf(stderr, "ERROR: expected '%s' match but got '%.*s'\n",
+ expected, matchlen, matchstart);
+ abort();
+ }
+ } else if (rv == REG_NOMATCH) {
+ if (expected != nullptr) {
+ fprintf(stderr, "ERROR: expected '%s' match but got no match\n", expected);
+ abort();
+ }
+ } else {
+ printf("ERROR: unexpected regexec return value %d\n", rv);
+ abort();
+ }
+}
+
+int main(void) {
+ regex_t regex;
+ int rv = regcomp(®ex, "[A-Z][A-Z]", 0);
+ assert(!rv);
+ test_matched(®ex, "ABCD", 0, 4, "AB");
+ test_matched(®ex, "ABCD", 0, 1, nullptr); // Not long enough
+ test_matched(®ex, "ABCD", 1, 4, "BC");
+ test_matched(®ex, "ABCD", 1, 2, nullptr); // Not long enough
+ test_matched(®ex, "ABCD", 2, 4, "CD");
+ test_matched(®ex, "ABCD", 2, 3, nullptr); // Not long enough
+ test_matched(®ex, "ABCD", 3, 4, nullptr); // Not long enough
+ regfree(®ex);
+ printf("Successful test\n");
+ return 0;
+}
More information about the llvm-commits
mailing list