[compiler-rt] [asan] Speed up ASan ODR indicator-based checking (PR #100923)

Artem Pianykh via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 15:27:34 PDT 2024


https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/100923

>From 711bbf3422a4694ed6fc665882a4c9889635cae0 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 28 Jul 2024 02:23:43 -0700
Subject: [PATCH 1/5] [asan] Speed up ASan ODR indicator-based checking

When ASan checks for ODR violation on a global it loops over a linked list of all globals to find
those with the matching value of an indicator. For larger binaries with a ton of globals this gets
extremely expensive: O(N^2) iterations + potentially poor memory locality from the linked list.

This patch adds an indicator indexed (multi-)map of globals to speed up the search. With a bit of
extra memory this massively speeds up indicator based ODR checks.

Poisoning based ODR checking remains on the slow path.
---
 compiler-rt/lib/asan/asan_globals.cpp | 43 +++++++++++++++++++++------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index d413b1ebc9fc0..26518c8ce4b98 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -21,6 +21,7 @@
 #include "asan_suppressions.h"
 #include "asan_thread.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_dense_map.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
@@ -35,8 +36,11 @@ struct ListOfGlobals {
   ListOfGlobals *next;
 };
 
+typedef DenseMap<uptr, ListOfGlobals *> MapOfGlobals;
+
 static Mutex mu_for_globals;
 static ListOfGlobals *list_of_all_globals;
+static MapOfGlobals map_of_globals_by_indicator;
 
 static const int kDynamicInitGlobalsInitialCapacity = 512;
 struct DynInitGlobal {
@@ -73,6 +77,18 @@ ALWAYS_INLINE void PoisonRedZones(const Global &g) {
 
 const uptr kMinimalDistanceFromAnotherGlobal = 64;
 
+static void AddGlobalToList(ListOfGlobals *&list, const Global *g) {
+  ListOfGlobals *l = new (GetGlobalLowLevelAllocator()) ListOfGlobals;
+  l->g = g;
+  l->next = list;
+  list = l;
+}
+
+static void AddGlobalToMap(MapOfGlobals &map, const Global *g) {
+  ListOfGlobals *&in_map = map[g->odr_indicator];
+  AddGlobalToList(in_map, g);
+}
+
 static bool IsAddressNearGlobal(uptr addr, const __asan_global &g) {
   if (addr <= g.beg - kMinimalDistanceFromAnotherGlobal) return false;
   if (addr >= g.beg + g.size_with_redzone) return false;
@@ -147,14 +163,20 @@ static void CheckODRViolationViaIndicator(const Global *g) {
     *odr_indicator = REGISTERED;
     return;
   }
+  // Fetch globals with the same ODR indicator
+  auto *relevant_globals_lookup =
+      map_of_globals_by_indicator.find(g->odr_indicator);
+  if (!relevant_globals_lookup) {
+    return;
+  }
+  ListOfGlobals *relevant_globals = relevant_globals_lookup->second;
   // If *odr_indicator is DEFINED, some module have already registered
   // externally visible symbol with the same name. This is an ODR violation.
-  for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
-    if (g->odr_indicator == l->g->odr_indicator &&
-        (flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
+  for (ListOfGlobals *l = relevant_globals; l; l = l->next) {
+    if ((flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
         !IsODRViolationSuppressed(g->name))
-      ReportODRViolation(g, FindRegistrationSite(g),
-                         l->g, FindRegistrationSite(l->g));
+      ReportODRViolation(g, FindRegistrationSite(g), l->g,
+                         FindRegistrationSite(l->g));
   }
 }
 
@@ -225,10 +247,13 @@ static void RegisterGlobal(const Global *g) {
   }
   if (CanPoisonMemory())
     PoisonRedZones(*g);
-  ListOfGlobals *l = new (GetGlobalLowLevelAllocator()) ListOfGlobals;
-  l->g = g;
-  l->next = list_of_all_globals;
-  list_of_all_globals = l;
+
+  AddGlobalToList(list_of_all_globals, g);
+
+  if (UseODRIndicator(g) && g->odr_indicator != UINTPTR_MAX) {
+    AddGlobalToMap(map_of_globals_by_indicator, g);
+  }
+
   if (g->has_dynamic_init) {
     if (!dynamic_init_globals) {
       dynamic_init_globals = new (GetGlobalLowLevelAllocator()) VectorOfGlobals;

>From 87979661dc99683fc2724df5e5018be7f9509f32 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Tue, 30 Jul 2024 02:57:45 -0700
Subject: [PATCH 2/5] amend! [asan] Speed up ASan ODR indicator-based checking

[asan] Speed up ASan ODR indicator-based checking

When ASan checks for a potential ODR violation on a global it loops over
a linked list of all globals to find those with the matching value of an
indicator. With the default setting 'detect_odr_violation=1', ASan
doesn't report violations on same-size globals but it still has to
traverse the list.

For larger binaries with a ton of shared libs and globals (and a
non-trivial volume of same-sized duplicates) this gets extremely
expensive.

This patch adds an indicator indexed (multi-)map of globals to speed up
the search.
---
 compiler-rt/lib/asan/asan_globals.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index 26518c8ce4b98..cc887bdbec347 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -163,12 +163,13 @@ static void CheckODRViolationViaIndicator(const Global *g) {
     *odr_indicator = REGISTERED;
     return;
   }
-  // Fetch globals with the same ODR indicator
+
+  // Fetch globals with the same ODR indicator.
   auto *relevant_globals_lookup =
       map_of_globals_by_indicator.find(g->odr_indicator);
-  if (!relevant_globals_lookup) {
+  if (!relevant_globals_lookup)
     return;
-  }
+
   ListOfGlobals *relevant_globals = relevant_globals_lookup->second;
   // If *odr_indicator is DEFINED, some module have already registered
   // externally visible symbol with the same name. This is an ODR violation.

>From 99b2444c7742b2c3a955660c75a95e063c03fc74 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Tue, 30 Jul 2024 04:30:38 -0700
Subject: [PATCH 3/5] fixup! amend! [asan] Speed up ASan ODR indicator-based
 checking

---
 compiler-rt/lib/asan/asan_globals.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index cc887bdbec347..53923d7466f99 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -251,9 +251,8 @@ static void RegisterGlobal(const Global *g) {
 
   AddGlobalToList(list_of_all_globals, g);
 
-  if (UseODRIndicator(g) && g->odr_indicator != UINTPTR_MAX) {
+  if (UseODRIndicator(g) && g->odr_indicator != UINTPTR_MAX)
     AddGlobalToMap(map_of_globals_by_indicator, g);
-  }
 
   if (g->has_dynamic_init) {
     if (!dynamic_init_globals) {

>From b4314e18aee9f58c1b26d99e28689fe17eeba880 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Wed, 31 Jul 2024 12:40:18 -0700
Subject: [PATCH 4/5] fixup! fixup! amend! [asan] Speed up ASan ODR
 indicator-based checking

---
 compiler-rt/lib/asan/asan_globals.cpp | 40 ++++++++++-----------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index 53923d7466f99..3f3a47fd25068 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -84,11 +84,6 @@ static void AddGlobalToList(ListOfGlobals *&list, const Global *g) {
   list = l;
 }
 
-static void AddGlobalToMap(MapOfGlobals &map, const Global *g) {
-  ListOfGlobals *&in_map = map[g->odr_indicator];
-  AddGlobalToList(in_map, g);
-}
-
 static bool IsAddressNearGlobal(uptr addr, const __asan_global &g) {
   if (addr <= g.beg - kMinimalDistanceFromAnotherGlobal) return false;
   if (addr >= g.beg + g.size_with_redzone) return false;
@@ -158,27 +153,25 @@ static void CheckODRViolationViaIndicator(const Global *g) {
   // Instrumentation requests to skip ODR check.
   if (g->odr_indicator == UINTPTR_MAX)
     return;
+
+  ListOfGlobals *&relevant_globals =
+      map_of_globals_by_indicator[g->odr_indicator];
+
   u8 *odr_indicator = reinterpret_cast<u8 *>(g->odr_indicator);
-  if (*odr_indicator == UNREGISTERED) {
+  if (*odr_indicator == REGISTERED) {
+    // If *odr_indicator is REGISTERED, some module have already registered
+    // externally visible symbol with the same name. This is an ODR violation.
+    for (ListOfGlobals *l = relevant_globals; l; l = l->next) {
+      if ((flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
+          !IsODRViolationSuppressed(g->name))
+        ReportODRViolation(g, FindRegistrationSite(g), l->g,
+                           FindRegistrationSite(l->g));
+    }
+  } else {  // UNREGISTERED
     *odr_indicator = REGISTERED;
-    return;
   }
 
-  // Fetch globals with the same ODR indicator.
-  auto *relevant_globals_lookup =
-      map_of_globals_by_indicator.find(g->odr_indicator);
-  if (!relevant_globals_lookup)
-    return;
-
-  ListOfGlobals *relevant_globals = relevant_globals_lookup->second;
-  // If *odr_indicator is DEFINED, some module have already registered
-  // externally visible symbol with the same name. This is an ODR violation.
-  for (ListOfGlobals *l = relevant_globals; l; l = l->next) {
-    if ((flags()->detect_odr_violation >= 2 || g->size != l->g->size) &&
-        !IsODRViolationSuppressed(g->name))
-      ReportODRViolation(g, FindRegistrationSite(g), l->g,
-                         FindRegistrationSite(l->g));
-  }
+  AddGlobalToList(relevant_globals, g);
 }
 
 // Check ODR violation for given global G by checking if it's already poisoned.
@@ -251,9 +244,6 @@ static void RegisterGlobal(const Global *g) {
 
   AddGlobalToList(list_of_all_globals, g);
 
-  if (UseODRIndicator(g) && g->odr_indicator != UINTPTR_MAX)
-    AddGlobalToMap(map_of_globals_by_indicator, g);
-
   if (g->has_dynamic_init) {
     if (!dynamic_init_globals) {
       dynamic_init_globals = new (GetGlobalLowLevelAllocator()) VectorOfGlobals;

>From 01847d1dd72fb078d26f38973391f37d2815ec3c Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Wed, 31 Jul 2024 15:18:26 -0700
Subject: [PATCH 5/5] [asan] Add a test that captures the ODR checker behavior
 when globals get unregistered

The test demonstrates a FN due to indicator value being shared among all globals with the same
indicator address.

Moving dlopen/dlclose around and/or adding more shared libs into the mix can show more edge cases
but this is out of the scope of this commit.
---
 .../Linux/odr_indicator_unregister.cpp        | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 compiler-rt/test/asan/TestCases/Linux/odr_indicator_unregister.cpp

diff --git a/compiler-rt/test/asan/TestCases/Linux/odr_indicator_unregister.cpp b/compiler-rt/test/asan/TestCases/Linux/odr_indicator_unregister.cpp
new file mode 100644
index 0000000000000..5366c7976797e
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/Linux/odr_indicator_unregister.cpp
@@ -0,0 +1,73 @@
+// Test that captures the current behavior of indicator-based ASan ODR checker
+// when globals get unregistered.
+//
+// RUN: %clangxx_asan -g -O0 -DSHARED_LIB -DSIZE=1 %s -fPIC -shared -o %t-so-1.so
+// RUN: %clangxx_asan -g -O0 -DSHARED_LIB -DSIZE=2 %s -fPIC -shared -o %t-so-2.so
+// RUN: %clangxx_asan -g -O0 %s %libdl -Wl,--export-dynamic -o %t
+// RUN: %env_asan_opts=report_globals=2:detect_odr_violation=1 %run %t 2>&1 | FileCheck %s
+
+#include <cstdlib>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <string>
+
+#ifdef SHARED_LIB
+namespace foo {
+char G[SIZE];
+}
+#else // SHARED_LIB
+void *dlopen_or_die(std::string &path) {
+  void *handle = dlopen(path.c_str(), RTLD_NOW);
+  if (handle) {
+    printf("Successfully called dlopen() on %s\n", path.c_str());
+  } else {
+    printf("Error in dlopen(): %s\n", dlerror());
+    std::exit(1);
+  }
+
+  return handle;
+}
+
+void dlclose_or_die(void *handle, std::string &path) {
+  if (!dlclose(handle)) {
+    printf("Successfully called dlclose() on %s\n", path.c_str());
+  } else {
+    printf("Error in dlclose(): %s\n", dlerror());
+    std::exit(1);
+  }
+}
+
+namespace foo {
+char G[1];
+}
+
+// CHECK: Added Global[[MAIN_G:[^\s]+]] size=1/32 name=foo::G {{.*}}
+int main(int argc, char *argv[]) {
+  std::string base_path = std::string(argv[0]);
+
+  std::string path1 = base_path + "-so-1.so";
+  // dlopen() brings another foo::G but it matches MAIN_G in size, hence the
+  // violation is not reported.
+  //
+  // CHECK: Added Global[[SO1_G:[^\s]+]] size=1/32 name=foo::G {{.*}}
+  // CHECK-NOT: ERROR: AddressSanitizer: odr-violation
+  void *handle1 = dlopen_or_die(path1);
+  // CHECK: Removed Global[[SO1_G]] size=1/32 name=foo::G {{.*}}
+  dlclose_or_die(handle1, path1);
+
+  // At this point the indicator for foo::G is switched to UNREGISTERED for
+  // **both** MAIN_G and SO1_G because the indicator value is shared.
+
+  std::string path2 = base_path + "-so-2.so";
+  // CHECK: Added Global[[SO2_G:[^\s]+]] size=2/32 name=foo::G {{.*}}
+  //
+  // Although, this global is different in size from MAIN_G the violation is not
+  // reported (see line 58).
+  //
+  // CHECK-NOT: ERROR: AddressSanitizer: odr-violation
+  void *handle2 = dlopen_or_die(path2);
+  // CHECK: Removed Global[[MAIN_G]] size=1/32 name=foo::G {{.*}}
+  // CHECK: Removed Global[[SO2_G]] size=2/32 name=foo::G {{.*}}
+}
+
+#endif // SHARED_LIB



More information about the llvm-commits mailing list