[compiler-rt] ad294e5 - [sanitizers] Fix interception of GLibc regexec

Alex Richardson via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 8 02:54:14 PST 2021


Author: Alex Richardson
Date: 2021-03-08T10:53:55Z
New Revision: ad294e572bc5c16f9dc420cc994322de6ca3fbfb

URL: https://github.com/llvm/llvm-project/commit/ad294e572bc5c16f9dc420cc994322de6ca3fbfb
DIFF: https://github.com/llvm/llvm-project/commit/ad294e572bc5c16f9dc420cc994322de6ca3fbfb.diff

LOG: [sanitizers] Fix interception of GLibc regexec

Previously, on GLibc systems, the interceptor was calling __compat_regexec
(regexec at GLIBC_2.2.5) insead of the newer __regexec (regexec at GLIBC_2.3.4).
The __compat_regexec strips the REG_STARTEND flag but does not report an
error if other flags are present. This can result in infinite loops for
programs that use REG_STARTEND to find all matches inside a buffer (since
ignoring REG_STARTEND means that the search always starts from the first
character).

The underlying issue is that GLibc's dlsym(RTLD_NEXT, ...) appears to
always return the oldest versioned symbol instead of the default. This
means it does not match the behaviour of dlsym(RTLD_DEFAULT, ...) or the
behaviour documented in the manpage.

It appears a similar issue was encountered with realpath and worked around
in 77ef78a0a5dbaa364529bd05ed7a7bd9a71dd8d4.

See also https://sourceware.org/bugzilla/show_bug.cgi?id=14932 and
https://sourceware.org/bugzilla/show_bug.cgi?id=1319.

Fixes https://github.com/google/sanitizers/issues/1371

Reviewed By: #sanitizers, vitalybuka, marxin

Differential Revision: https://reviews.llvm.org/D96348

Added: 
    compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp

Modified: 
    compiler-rt/lib/asan/asan_interceptors.cpp
    compiler-rt/lib/asan/asan_interceptors.h
    compiler-rt/lib/memprof/memprof_interceptors.cpp
    compiler-rt/lib/memprof/memprof_interceptors.h
    compiler-rt/lib/msan/msan_interceptors.cpp
    compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
    compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index cd07d51878b1..9db7db89fa1a 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -90,8 +90,10 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
   (void) ctx;                                                                  \
 
 #define COMMON_INTERCEPT_FUNCTION(name) ASAN_INTERCEPT_FUNC(name)
-#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
   ASAN_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+  ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \
   ASAN_WRITE_RANGE(ctx, ptr, size)
 #define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \
@@ -672,6 +674,7 @@ void InitializeAsanInterceptors() {
 
   // Intercept threading-related functions
 #if ASAN_INTERCEPT_PTHREAD_CREATE
+// TODO: this should probably have an unversioned fallback for newer arches?
 #if defined(ASAN_PTHREAD_CREATE_VERSION)
   ASAN_INTERCEPT_FUNC_VER(pthread_create, ASAN_PTHREAD_CREATE_VERSION);
 #else

diff  --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index 45cdb80b1b64..e8c58c2dc6b6 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -145,6 +145,13 @@ DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
       VReport(1, "AddressSanitizer: failed to intercept '%s@@%s'\n", #name, \
               #ver);                                                        \
   } while (0)
+#define ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)              \
+  do {                                                                       \
+    if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name))     \
+      VReport(1, "AddressSanitizer: failed to intercept '%s@@%s' or '%s'\n", \
+              #name, #ver, #name);                                           \
+  } while (0)
+
 #else
 // OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION.
 #define ASAN_INTERCEPT_FUNC(name)

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index caa629b9c474..e22768061e70 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -59,6 +59,8 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
 #define COMMON_INTERCEPT_FUNCTION(name) MEMPROF_INTERCEPT_FUNC(name)
 #define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                               \
   MEMPROF_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver)          \
+  MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
   MEMPROF_WRITE_RANGE(ptr, size)
 #define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                          \

diff  --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h
index b6a4fa411254..ca5f3690430a 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.h
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -50,5 +50,11 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
     if (!INTERCEPT_FUNCTION_VER(name, ver))                                    \
       VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver);  \
   } while (0)
+#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)             \
+  do {                                                                         \
+    if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name))       \
+      VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \
+              #ver, #name);                                                    \
+  } while (0)
 
 #endif // MEMPROF_INTERCEPTORS_H

diff  --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index 4eea94f1f969..529bf22f05b9 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -1257,10 +1257,18 @@ int OnExit() {
       VReport(1, "MemorySanitizer: failed to intercept '%s@@%s'\n", #name, \
               #ver);                                                       \
   } while (0)
+#define MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)             \
+  do {                                                                      \
+    if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name))    \
+      VReport(1, "MemorySanitizer: failed to intercept '%s@@%s' or '%s'\n", \
+              #name, #ver, #name);                                          \
+  } while (0)
 
 #define COMMON_INTERCEPT_FUNCTION(name) MSAN_INTERCEPT_FUNC(name)
-#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \
   MSAN_INTERCEPT_FUNC_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+  MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
 #define COMMON_INTERCEPTOR_UNPOISON_PARAM(count)  \
   UnpoisonParam(count)
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index f1cf51f5b2bf..5314b8e771c9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -239,6 +239,23 @@ extern const short *_tolower_tab_;
     COMMON_INTERCEPT_FUNCTION(fn)
 #endif
 
+#if SANITIZER_GLIBC
+// If we could not find the versioned symbol, fall back to an unversioned
+// lookup. This is needed to work around a GLibc bug that causes dlsym
+// with RTLD_NEXT to return the oldest versioned symbol.
+// See https://sourceware.org/bugzilla/show_bug.cgi?id=14932.
+// For certain symbols (e.g. regexec) we have to perform a versioned lookup,
+// but that versioned symbol will only exist for architectures where the
+// oldest Glibc version pre-dates support for that architecture.
+// For example, regexec at GLIBC_2.3.4 exists on x86_64, but not RISC-V.
+// See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98920.
+#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
+  COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(fn, ver)
+#else
+#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
+  COMMON_INTERCEPT_FUNCTION(fn)
+#endif
+
 #ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
 #define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
   {                                                       \
@@ -7781,7 +7798,7 @@ INTERCEPTOR(void, regfree, const void *preg) {
 }
 #define INIT_REGEX                                                             \
   COMMON_INTERCEPT_FUNCTION(regcomp);                                          \
-  COMMON_INTERCEPT_FUNCTION(regexec);                                          \
+  COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(regexec, "GLIBC_2.3.4");             \
   COMMON_INTERCEPT_FUNCTION(regerror);                                         \
   COMMON_INTERCEPT_FUNCTION(regfree);
 #else

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index b8b964cda58b..a80e24b87373 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2263,6 +2263,8 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
 #define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
 #define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
   INTERCEPT_FUNCTION_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+  (INTERCEPT_FUNCTION_VER(name, ver) || INTERCEPT_FUNCTION(name))
 
 #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                    \
   MemoryAccessRange(((TsanInterceptorContext *)ctx)->thr,                 \

diff  --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp
new file mode 100644
index 000000000000..1a445783bcbe
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp
@@ -0,0 +1,61 @@
+// RUN: %clangxx -O0 -g %s -o %t && %run %t
+/// Check that REG_STARTEND is handled correctly.
+/// This is a regression test for https://github.com/google/sanitizers/issues/1371
+/// Previously, on GLibc systems, the interceptor was calling __compat_regexec
+/// (regexec at GLIBC_2.2.5) insead of the newer __regexec (regexec at GLIBC_2.3.4).
+/// The __compat_regexec strips the REG_STARTEND flag but does not report an error
+/// if other flags are present. This can result in infinite loops for programs that
+/// use REG_STARTEND to find all matches inside a buffer (since ignoring REG_STARTEND
+/// means that the search always starts from the first character).
+
+#include <assert.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void test_matched(const regex_t *preg, const char *string, size_t start,
+                  size_t end, const char *expected) {
+  regmatch_t match[1];
+  match[0].rm_so = start;
+  match[0].rm_eo = end;
+  int rv = regexec(preg, string, 1, match, REG_STARTEND);
+  int matchlen = (int)(match[0].rm_eo - match[0].rm_so);
+  const char *matchstart = string + match[0].rm_so;
+  if (rv == 0) {
+    if (expected == nullptr) {
+      fprintf(stderr, "ERROR: expected no match but got '%.*s'\n",
+              matchlen, matchstart);
+      abort();
+    } else if ((size_t)matchlen != strlen(expected) ||
+               memcmp(matchstart, expected, strlen(expected)) != 0) {
+      fprintf(stderr, "ERROR: expected '%s' match but got '%.*s'\n",
+              expected, matchlen, matchstart);
+      abort();
+    }
+  } else if (rv == REG_NOMATCH) {
+    if (expected != nullptr) {
+      fprintf(stderr, "ERROR: expected '%s' match but got no match\n", expected);
+      abort();
+    }
+  } else {
+    printf("ERROR: unexpected regexec return value %d\n", rv);
+    abort();
+  }
+}
+
+int main(void) {
+  regex_t regex;
+  int rv = regcomp(&regex, "[A-Z][A-Z]", 0);
+  assert(!rv);
+  test_matched(&regex, "ABCD", 0, 4, "AB");
+  test_matched(&regex, "ABCD", 0, 1, nullptr); // Not long enough
+  test_matched(&regex, "ABCD", 1, 4, "BC");
+  test_matched(&regex, "ABCD", 1, 2, nullptr); // Not long enough
+  test_matched(&regex, "ABCD", 2, 4, "CD");
+  test_matched(&regex, "ABCD", 2, 3, nullptr); // Not long enough
+  test_matched(&regex, "ABCD", 3, 4, nullptr); // Not long enough
+  regfree(&regex);
+  printf("Successful test\n");
+  return 0;
+}


        


More information about the llvm-commits mailing list