[compiler-rt] [asan] Optimize initialization order checking (PR #101837)

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 7 17:01:15 PDT 2024


https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/101837

>From f26dfea1bf5da060aa895fa107575a228d5b0702 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Sat, 3 Aug 2024 10:35:20 -0700
Subject: [PATCH 1/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 compiler-rt/lib/asan/asan_globals.cpp | 108 +++++++++++++++++++-------
 1 file changed, 80 insertions(+), 28 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index cc5308a24fe89..293e771e1dc06 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -47,8 +47,6 @@ struct DynInitGlobal {
   bool initialized = false;
   DynInitGlobal *next = nullptr;
 };
-typedef IntrusiveList<DynInitGlobal> DynInitGlobals;
-static DynInitGlobals dynamic_init_globals SANITIZER_GUARDED_BY(mu_for_globals);
 
 // We want to remember where a certain range of globals was registered.
 struct GlobalRegistrationSite {
@@ -72,6 +70,25 @@ static ListOfGlobals &GlobalsByIndicator(uptr odr_indicator)
   return (*globals_by_indicator)[odr_indicator];
 }
 
+static const char *current_dynamic_init_module_name
+    SANITIZER_GUARDED_BY(mu_for_globals) = nullptr;
+
+using DynInitGlobalsByModule =
+    DenseMap<const char *, IntrusiveList<DynInitGlobal>>;
+
+// TODO: Add a NoDestroy helper, this patter is very common in sanitizers.
+static DynInitGlobalsByModule &DynInitGlobals()
+    SANITIZER_REQUIRES(mu_for_globals) {
+  static DynInitGlobalsByModule *globals_by_module = nullptr;
+  if (!globals_by_module) {
+    alignas(alignof(DynInitGlobalsByModule)) static char
+        placeholder[sizeof(DynInitGlobalsByModule)];
+    globals_by_module = new (placeholder) DynInitGlobalsByModule();
+  }
+
+  return *globals_by_module;
+}
+
 ALWAYS_INLINE void PoisonShadowForGlobal(const Global *g, u8 value) {
   FastPoisonShadow(g->beg, g->size_with_redzone, value);
 }
@@ -94,6 +111,31 @@ static void AddGlobalToList(ListOfGlobals &list, const Global *g) {
   list.push_front(new (GetGlobalLowLevelAllocator()) GlobalListNode{g});
 }
 
+static void UnpoisonDynamicGlobals(IntrusiveList<DynInitGlobal> &dyn_globals,
+                                   bool mark_initialized) {
+  for (auto &dyn_g : dyn_globals) {
+    const Global *g = &dyn_g.g;
+    if (dyn_g.initialized)
+      continue;
+    // Unpoison the whole global.
+    PoisonShadowForGlobal(g, 0);
+    // Poison redzones back.
+    PoisonRedZones(*g);
+    if (mark_initialized)
+      dyn_g.initialized = true;
+  }
+}
+
+static void PoisonDynamicGlobals(
+    const IntrusiveList<DynInitGlobal> &dyn_globals) {
+  for (auto &dyn_g : dyn_globals) {
+    const Global *g = &dyn_g.g;
+    if (dyn_g.initialized)
+      continue;
+    PoisonShadowForGlobal(g, kAsanInitializationOrderMagic);
+  }
+}
+
 static bool IsAddressNearGlobal(uptr addr, const __asan_global &g) {
   if (addr <= g.beg - kMinimalDistanceFromAnotherGlobal) return false;
   if (addr >= g.beg + g.size_with_redzone) return false;
@@ -257,8 +299,8 @@ static void RegisterGlobal(const Global *g) SANITIZER_REQUIRES(mu_for_globals) {
   AddGlobalToList(list_of_all_globals, g);
 
   if (g->has_dynamic_init) {
-    dynamic_init_globals.push_back(new (GetGlobalLowLevelAllocator())
-                                       DynInitGlobal{*g, false});
+    DynInitGlobals()[g->module_name].push_back(
+        new (GetGlobalLowLevelAllocator()) DynInitGlobal{*g, false});
   }
 }
 
@@ -289,13 +331,10 @@ void StopInitOrderChecking() {
     return;
   Lock lock(&mu_for_globals);
   flags()->check_initialization_order = false;
-  for (const DynInitGlobal &dyn_g : dynamic_init_globals) {
-    const Global *g = &dyn_g.g;
-    // Unpoison the whole global.
-    PoisonShadowForGlobal(g, 0);
-    // Poison redzones back.
-    PoisonRedZones(*g);
-  }
+  DynInitGlobals().forEach([&](auto &kv) {
+    UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false);
+    return true;
+  });
 }
 
 static bool IsASCII(unsigned char c) { return /*0x00 <= c &&*/ c <= 0x7F; }
@@ -456,17 +495,29 @@ void __asan_before_dynamic_init(const char *module_name) {
   CHECK(module_name);
   CHECK(AsanInited());
   Lock lock(&mu_for_globals);
+  if (current_dynamic_init_module_name == module_name)
+    return;
   if (flags()->report_globals >= 3)
     Printf("DynInitPoison module: %s\n", module_name);
-  for (DynInitGlobal &dyn_g : dynamic_init_globals) {
-    const Global *g = &dyn_g.g;
-    if (dyn_g.initialized)
-      continue;
-    if (g->module_name != module_name)
-      PoisonShadowForGlobal(g, kAsanInitializationOrderMagic);
-    else if (!strict_init_order)
-      dyn_g.initialized = true;
+
+  if (current_dynamic_init_module_name == nullptr) {
+    // First call, poison all globals from other modules.
+    DynInitGlobals().forEach([&](auto &kv) {
+      if (kv.first != module_name) {
+        PoisonDynamicGlobals(kv.second);
+      } else {
+        UnpoisonDynamicGlobals(kv.second,
+                               /*mark_initialized=*/!strict_init_order);
+      }
+      return true;
+    });
+  } else {
+    // Module changed.
+    PoisonDynamicGlobals(DynInitGlobals()[current_dynamic_init_module_name]);
+    UnpoisonDynamicGlobals(DynInitGlobals()[module_name],
+                           /*mark_initialized=*/!strict_init_order);
   }
+  current_dynamic_init_module_name = module_name;
 }
 
 // This method runs immediately after dynamic initialization in each TU, when
@@ -477,15 +528,16 @@ void __asan_after_dynamic_init() {
     return;
   CHECK(AsanInited());
   Lock lock(&mu_for_globals);
+  if (!current_dynamic_init_module_name)
+    return;
+
   if (flags()->report_globals >= 3)
     Printf("DynInitUnpoison\n");
-  for (const DynInitGlobal &dyn_g : dynamic_init_globals) {
-    const Global *g = &dyn_g.g;
-    if (!dyn_g.initialized) {
-      // Unpoison the whole global.
-      PoisonShadowForGlobal(g, 0);
-      // Poison redzones back.
-      PoisonRedZones(*g);
-    }
-  }
+
+  DynInitGlobals().forEach([&](auto &kv) {
+    UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false);
+    return true;
+  });
+
+  current_dynamic_init_module_name = nullptr;
 }

>From 75060ddc588f64c6f262f73ae84f0d993111ae34 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Sat, 3 Aug 2024 13:01:36 -0700
Subject: [PATCH 2/7] reword comment

Created using spr 1.3.4
---
 compiler-rt/test/asan/TestCases/initialization-nobug.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/initialization-nobug.cpp b/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
index d5969b547ecb0..62d063b3a959e 100644
--- a/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
@@ -46,7 +46,8 @@ int main() { return 0; }
 // CHECK: DynInitPoison
 // CHECK: DynInitPoison
 
-// In general case we only need the last of DynInit{Poison,Unpoison} is always
-// Unpoison.
+// In general case entire set of DynInitPoison must be followed by at lest one
+// DynInitUnpoison. In some cases we can limit number of DynInitUnpoison, see
+// initialization-nobug-lld.cpp.
 
 // CHECK: DynInitUnpoison

>From 1ffa55d00c5756bb19404c239096c10333b43bec Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Sat, 3 Aug 2024 13:02:15 -0700
Subject: [PATCH 3/7] reword

Created using spr 1.3.4
---
 compiler-rt/test/asan/TestCases/initialization-nobug.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/initialization-nobug.cpp b/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
index 62d063b3a959e..f66d501124bc4 100644
--- a/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-nobug.cpp
@@ -47,7 +47,7 @@ int main() { return 0; }
 // CHECK: DynInitPoison
 
 // In general case entire set of DynInitPoison must be followed by at lest one
-// DynInitUnpoison. In some cases we can limit number of DynInitUnpoison, see
-// initialization-nobug-lld.cpp.
+// DynInitUnpoison. In some cases we can limit the number of DynInitUnpoison,
+// see initialization-nobug-lld.cpp.
 
 // CHECK: DynInitUnpoison

>From c2ca3ba8984296f17aa61e65d5a0587db78bd832 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Wed, 7 Aug 2024 16:46:44 -0700
Subject: [PATCH 4/7] move

Created using spr 1.3.4
---
 .../asan/TestCases/{ => Linux}/initialization-nobug-lld.cpp     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename compiler-rt/test/asan/TestCases/{ => Linux}/initialization-nobug-lld.cpp (91%)

diff --git a/compiler-rt/test/asan/TestCases/initialization-nobug-lld.cpp b/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
similarity index 91%
rename from compiler-rt/test/asan/TestCases/initialization-nobug-lld.cpp
rename to compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
index ece3118de4ad9..80b502cd5538a 100644
--- a/compiler-rt/test/asan/TestCases/initialization-nobug-lld.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O3 %p/initialization-nobug.cpp %p/Helpers/initialization-nobug-extra.cpp -fuse-ld=lld -o %t && %env_asan_opts=check_initialization_order=true:report_globals=3 %run %t 2>&1 | FileCheck %s --implicit-check-not "DynInit"
+// RUN: %clangxx_asan -O3 %p/initialization-nobug.cpp %S/Helpers/initialization-nobug-extra.cpp -fuse-ld=lld -o %t && %env_asan_opts=check_initialization_order=true:report_globals=3 %run %t 2>&1 | FileCheck %s --implicit-check-not "DynInit"
 
 // Same as initialization-nobug.cpp, but with lld we expect just one
 // `DynInitUnpoison` executed after `AfterDynamicInit` at the end.

>From 317ad9c5bf55942aece582a884b629737cda7f08 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Wed, 7 Aug 2024 16:52:32 -0700
Subject: [PATCH 5/7] %S

Created using spr 1.3.4
---
 .../test/asan/TestCases/Linux/initialization-nobug-lld.cpp      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp b/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
index 80b502cd5538a..5595f9edce59b 100644
--- a/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/initialization-nobug-lld.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O3 %p/initialization-nobug.cpp %S/Helpers/initialization-nobug-extra.cpp -fuse-ld=lld -o %t && %env_asan_opts=check_initialization_order=true:report_globals=3 %run %t 2>&1 | FileCheck %s --implicit-check-not "DynInit"
+// RUN: %clangxx_asan -O3 %S/../initialization-nobug.cpp %S/../Helpers/initialization-nobug-extra.cpp -fuse-ld=lld -o %t && %env_asan_opts=check_initialization_order=true:report_globals=3 %run %t 2>&1 | FileCheck %s --implicit-check-not "DynInit"
 
 // Same as initialization-nobug.cpp, but with lld we expect just one
 // `DynInitUnpoison` executed after `AfterDynamicInit` at the end.

>From f70a22969ed233814a1398c3bfc86025ec06e9f6 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Wed, 7 Aug 2024 16:56:35 -0700
Subject: [PATCH 6/7] comment

Created using spr 1.3.4
---
 compiler-rt/lib/asan/asan_globals.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index 284fc8d2650f9..740b2a898f5b4 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -551,9 +551,7 @@ static void UnpoisonBeforeMain(void) {
 __attribute__((section(".init_array.65537"), used)) static void (
     *asan_after_init_array)(void) = UnpoisonBeforeMain;
 #else
-// Allow all `__asan_after_dynamic_init` if `AfterDynamicInit` is not set.
-// Compiler still generates `__asan_{before,after}_dynamic_init`in pairs, and
-// it's guaranteed that `__asan_after_dynamic_init` will be the last.
+// Incremental poisoning is disabled, unpoison globals immediately.
 static constexpr bool allow_after_dynamic_init = true;
 #endif  // SANITIZER_CAN_USE_PREINIT_ARRAY
 

>From 71f0fdc64b20c82ab3714e9bf2357a249126699d Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Wed, 7 Aug 2024 17:01:03 -0700
Subject: [PATCH 7/7] comment

Created using spr 1.3.4
---
 compiler-rt/lib/asan/asan_globals.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index 740b2a898f5b4..c83b782cb85f8 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -521,15 +521,21 @@ void __asan_before_dynamic_init(const char *module_name) {
 }
 
 // Maybe SANITIZER_CAN_USE_PREINIT_ARRAY is to conservative for `.init_array`,
-// however we should not make mistake here. If `AfterDynamicInit` was not
+// however we should not make mistake here. If `UnpoisonBeforeMain` was not
 // executed at all we will have false reports on globals.
 #if SANITIZER_CAN_USE_PREINIT_ARRAY
-// This is optimization. We will ignore all `__asan_after_dynamic_init`, but the
-// last `__asan_after_dynamic_init`. We don't need number of
-// `__asan_{before,after}_dynamic_init` matches, but we need that the last call
-// was to `__asan_after_dynamic_init`, as it will unpoison all global preparing
-// program for `main` execution. To run `__asan_after_dynamic_init` later, we
-// will register in `.init_array`.
+// This optimization aims to reduce the overhead of `__asan_after_dynamic_init`
+// calls by leveraging incremental unpoisoning/poisoning in
+// `__asan_before_dynamic_init`. We expect most `__asan_after_dynamic_init
+// calls` to be no-ops. However, to ensure all globals are unpoisoned before the
+// `main`, we force `UnpoisonBeforeMain` to fully execute
+// `__asan_after_dynamic_init`.
+
+// With lld, `UnpoisonBeforeMain` runs after standard `.init_array`, making it
+// the final `__asan_after_dynamic_init` call for the static runtime. In
+// contrast, GNU ld executes it earlier, causing subsequent
+// `__asan_after_dynamic_init` calls to perform full unpoisoning, losing the
+// optimization.
 bool allow_after_dynamic_init SANITIZER_GUARDED_BY(mu_for_globals) = false;
 
 static void UnpoisonBeforeMain(void) {
@@ -540,14 +546,10 @@ static void UnpoisonBeforeMain(void) {
     allow_after_dynamic_init = true;
   }
   if (flags()->report_globals >= 3)
-    Printf("AfterDynamicInit\n");
+    Printf("UnpoisonBeforeMain\n");
   __asan_after_dynamic_init();
 }
 
-// 65537 will make it run after constructors with default priority, but it
-// requires ld.lld. With ld.bfd it can be called to early, and fail the
-// optimization. However, correctness should not be affected, as after the first
-// call all subsequent `__asan_after_dynamic_init` will be allowed.
 __attribute__((section(".init_array.65537"), used)) static void (
     *asan_after_init_array)(void) = UnpoisonBeforeMain;
 #else



More information about the llvm-commits mailing list