[compiler-rt] 9ab590e - [msan] Add bsearch interceptor

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 3 18:39:24 PDT 2021


Author: Vitaly Buka
Date: 2021-08-03T18:39:14-07:00
New Revision: 9ab590e3ebb2f4a652b4cbdd3594848e8fb017fe

URL: https://github.com/llvm/llvm-project/commit/9ab590e3ebb2f4a652b4cbdd3594848e8fb017fe
DIFF: https://github.com/llvm/llvm-project/commit/9ab590e3ebb2f4a652b4cbdd3594848e8fb017fe.diff

LOG: [msan] Add bsearch interceptor

Similar to qsort, bsearch can be called from non-instrumented
code of glibc. When it happends tls for arguments can be in uninitialized
state.

Unlike to qsort, bsearch does not move data, so we don't need to
check or initialize searched memory or key. Intrumented comparator will
do that on it's own.

Differential Revision: https://reviews.llvm.org/D107387

Added: 
    compiler-rt/test/msan/bsearch.cpp
    compiler-rt/test/sanitizer_common/TestCases/Posix/bsearch.cpp

Modified: 
    compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
    compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 3182341efb48b..481a7e4cd1862 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -10035,6 +10035,30 @@ INTERCEPTOR(void, qsort_r, void *base, SIZE_T nmemb, SIZE_T size,
 #define INIT_QSORT_R
 #endif
 
+#if SANITIZER_INTERCEPT_BSEARCH
+typedef int (*bsearch_compar_f)(const void *, const void *);
+static THREADLOCAL bsearch_compar_f bsearch_compar;
+static int wrapped_bsearch_compar(const void *a, const void *b) {
+  COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
+  return bsearch_compar(a, b);
+}
+
+INTERCEPTOR(void *, bsearch, const void *key, const void *base, SIZE_T nmemb,
+            SIZE_T size, bsearch_compar_f compar) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, bsearch, key, base, nmemb, size, compar);
+  // Unlike qsort, don't expect recursive implementation of bsearch.
+  CHECK_NE(compar, wrapped_bsearch_compar);
+  Swap(bsearch_compar, compar);
+  void *r = REAL(bsearch)(key, base, nmemb, size, wrapped_bsearch_compar);
+  bsearch_compar = compar;
+  return r;
+}
+#  define INIT_BSEARCH COMMON_INTERCEPT_FUNCTION(bsearch)
+#else
+#  define INIT_BSEARCH
+#endif
+
 #if SANITIZER_INTERCEPT_SIGALTSTACK
 INTERCEPTOR(int, sigaltstack, void *ss, void *oss) {
   void *ctx;
@@ -10401,6 +10425,7 @@ static void InitializeCommonInterceptors() {
   INIT_GETENTROPY;
   INIT_QSORT;
   INIT_QSORT_R;
+  INIT_BSEARCH;
   INIT_SIGALTSTACK;
   INIT_UNAME;
   INIT___XUNAME;

diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index fabeabf8f00bf..a65a39afe9f8b 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -571,6 +571,8 @@
 #define SANITIZER_INTERCEPT_QSORT \
   (SI_POSIX && !SI_IOSSIM && !SI_WATCHOS && !SI_TVOS && !SI_ANDROID)
 #define SANITIZER_INTERCEPT_QSORT_R SI_GLIBC
+#define SANITIZER_INTERCEPT_BSEARCH \
+  (SI_POSIX && !SI_IOSSIM && !SI_WATCHOS && !SI_TVOS && !SI_ANDROID)
 // sigaltstack on i386 macOS cannot be intercepted due to setjmp()
 // calling it and assuming that it does not clobber registers.
 #define SANITIZER_INTERCEPT_SIGALTSTACK \

diff  --git a/compiler-rt/test/msan/bsearch.cpp b/compiler-rt/test/msan/bsearch.cpp
new file mode 100644
index 0000000000000..bf2526ce4228b
--- /dev/null
+++ b/compiler-rt/test/msan/bsearch.cpp
@@ -0,0 +1,48 @@
+// __NO_INLINE__ is defined so bsearch needs interceptor.
+// RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
+// RUN: %clangxx_msan -DPOISON_DATA -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DPOISON_KEY -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// __NO_INLINE__ is undefined so bsearch should be inlined and instrumented and still work as expected.
+// RUN: %clangxx_msan -O2 -g %s -o %t && %run %t
+// RUN: %clangxx_msan -DPOISON_DATA -O2 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DPOISON_KEY -O2 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sanitizer/msan_interface.h>
+
+long z;
+
+__attribute__((noinline, optnone)) void
+poison_msan_param_tls(long a, long b, long c, long d, long e, long f) {
+  z = a + b + c + d + e + f;
+}
+
+static int compar(const void *a, const void *b) {
+  int r = *(const long *)a - *(const long *)b;
+  long x;
+  __msan_poison(&x, sizeof(x));
+  poison_msan_param_tls(x, x, x, x, x, x);
+  return r;
+}
+
+int main(int argc, char *argv[]) {
+  constexpr size_t SZ = 27;
+  long p[SZ + 1];
+  for (int i = 0; i < SZ + 1; ++i)
+    p[i] = i;
+  p[SZ] = SZ / 3;
+#if defined(POISON_DATA)
+  __msan_poison(p, sizeof(long) * SZ / 2);
+#elif defined(POISON_KEY)
+  __msan_poison(p + SZ, sizeof(long));
+#endif
+  const long *r = (const long *)bsearch(p + SZ, p, SZ, sizeof(long), compar);
+  // CHECK: MemorySanitizer: use-of-uninitialized-value
+
+  assert(r == p + SZ / 3);
+
+  return 0;
+}

diff  --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/bsearch.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/bsearch.cpp
new file mode 100644
index 0000000000000..d8b16388b4ac5
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/bsearch.cpp
@@ -0,0 +1,55 @@
+// Check interceptor.
+// RUN: %clangxx -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Inlined bsearch works even without interceptors.
+// RUN: %clangxx -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static int arr1[] = {8, 7, 6, 5, 4, 3, 2, 1, 0};
+static int arr2[] = {10, 1, 1, 3, 4, 6, 7, 7};
+
+#define array_size(x) (sizeof(x) / sizeof(x[0]))
+
+static int cmp_ints(const void *a, const void *b) {
+  return *(const int *)b - *(const int *)a;
+}
+
+static int cmp_pos(const void *a, const void *b) {
+  const int *ap =
+      (const int *)bsearch(a, arr1, array_size(arr1), sizeof(int), &cmp_ints);
+  if (!ap)
+    ap = arr1 + array_size(arr1);
+  const int *bp =
+      (const int *)bsearch(b, arr1, array_size(arr1), sizeof(int), &cmp_ints);
+  if (!bp)
+    bp = arr1 + array_size(arr1);
+  return bp - ap;
+}
+
+int main() {
+  // Simple bsearch.
+  for (int i = 0; i < 10; ++i) {
+    const void *r =
+        bsearch(&i, arr1, array_size(arr1), sizeof(arr1[0]), &cmp_ints);
+    if (!r)
+      printf(" null");
+    else
+      printf(" %d", *(const int *)r);
+  }
+  printf("\n");
+  // CHECK: 0 1 2 3 4 5 6 7 8 null
+
+  // Nested bsearch.
+  for (int i = 0; i < 10; ++i) {
+    const void *r =
+        bsearch(&i, arr2, array_size(arr2), sizeof(arr2[0]), &cmp_pos);
+    if (!r)
+      printf(" null");
+    else
+      printf(" %d", *(const int *)r);
+  }
+  printf("\n");
+  // CHECK: null 1 null 3 4 null 6 7 null 10
+}


        


More information about the llvm-commits mailing list