[compiler-rt] [llvm] [ModuleUtils] Add transformGlobal{C, D}tors (PR #101757)

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 16:49:30 PDT 2024


https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/101757

>From d0cf48c58697f16549ea75470e8be2def1c31499 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Fri, 2 Aug 2024 14:51:52 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 compiler-rt/lib/asan/asan_globals.cpp         | 130 ++++++++++++++----
 .../Instrumentation/AddressSanitizer.cpp      |   5 +
 .../Transforms/Utils/ModuleUtilsTest.cpp      |  85 ++++++++++--
 3 files changed, 179 insertions(+), 41 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index cc5308a24fe89..80030af934cc6 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -47,8 +47,6 @@ struct DynInitGlobal {
   bool initialized = false;
   DynInitGlobal *next = nullptr;
 };
-typedef IntrusiveList<DynInitGlobal> DynInitGlobals;
-static DynInitGlobals dynamic_init_globals SANITIZER_GUARDED_BY(mu_for_globals);
 
 // We want to remember where a certain range of globals was registered.
 struct GlobalRegistrationSite {
@@ -72,6 +70,25 @@ static ListOfGlobals &GlobalsByIndicator(uptr odr_indicator)
   return (*globals_by_indicator)[odr_indicator];
 }
 
+static const char *current_dynamic_init_module_name
+    SANITIZER_GUARDED_BY(mu_for_globals) = nullptr;
+
+using DynInitGlobalsByModule =
+    DenseMap<const char *, IntrusiveList<DynInitGlobal>>;
+
+// TODO: Add a NoDestroy helper, this patter is very common in sanitizers.
+static DynInitGlobalsByModule &DynInitGlobals()
+    SANITIZER_REQUIRES(mu_for_globals) {
+  static DynInitGlobalsByModule *globals_by_module = nullptr;
+  if (!globals_by_module) {
+    alignas(alignof(DynInitGlobalsByModule)) static char
+        placeholder[sizeof(DynInitGlobalsByModule)];
+    globals_by_module = new (placeholder) DynInitGlobalsByModule();
+  }
+
+  return *globals_by_module;
+}
+
 ALWAYS_INLINE void PoisonShadowForGlobal(const Global *g, u8 value) {
   FastPoisonShadow(g->beg, g->size_with_redzone, value);
 }
@@ -257,8 +274,8 @@ static void RegisterGlobal(const Global *g) SANITIZER_REQUIRES(mu_for_globals) {
   AddGlobalToList(list_of_all_globals, g);
 
   if (g->has_dynamic_init) {
-    dynamic_init_globals.push_back(new (GetGlobalLowLevelAllocator())
-                                       DynInitGlobal{*g, false});
+    DynInitGlobals()[g->module_name].push_back(
+        new (GetGlobalLowLevelAllocator()) DynInitGlobal{*g, false});
   }
 }
 
@@ -284,20 +301,42 @@ static void UnregisterGlobal(const Global *g)
   }
 }
 
-void StopInitOrderChecking() {
-  if (!flags()->check_initialization_order)
-    return;
-  Lock lock(&mu_for_globals);
-  flags()->check_initialization_order = false;
-  for (const DynInitGlobal &dyn_g : dynamic_init_globals) {
+static void UnpoisonDynamicGlobals(IntrusiveList<DynInitGlobal> &dyn_globals,
+                                   bool mark_initialized) {
+  for (auto &dyn_g : dyn_globals) {
     const Global *g = &dyn_g.g;
+    if (dyn_g.initialized)
+      continue;
     // Unpoison the whole global.
     PoisonShadowForGlobal(g, 0);
     // Poison redzones back.
     PoisonRedZones(*g);
+    if (mark_initialized)
+      dyn_g.initialized = true;
   }
 }
 
+static void PoisonDynamicGlobals(
+    const IntrusiveList<DynInitGlobal> &dyn_globals) {
+  for (auto &dyn_g : dyn_globals) {
+    const Global *g = &dyn_g.g;
+    if (dyn_g.initialized)
+      continue;
+    PoisonShadowForGlobal(g, kAsanInitializationOrderMagic);
+  }
+}
+
+void StopInitOrderChecking() {
+  if (!flags()->check_initialization_order)
+    return;
+  Lock lock(&mu_for_globals);
+  flags()->check_initialization_order = false;
+  DynInitGlobals().forEach([&](auto &kv) {
+    UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false);
+    return true;
+  });
+}
+
 static bool IsASCII(unsigned char c) { return /*0x00 <= c &&*/ c <= 0x7F; }
 
 const char *MaybeDemangleGlobalName(const char *name) {
@@ -456,36 +495,73 @@ void __asan_before_dynamic_init(const char *module_name) {
   CHECK(module_name);
   CHECK(AsanInited());
   Lock lock(&mu_for_globals);
+  if (current_dynamic_init_module_name == module_name)
+    return;
   if (flags()->report_globals >= 3)
     Printf("DynInitPoison module: %s\n", module_name);
-  for (DynInitGlobal &dyn_g : dynamic_init_globals) {
-    const Global *g = &dyn_g.g;
-    if (dyn_g.initialized)
-      continue;
-    if (g->module_name != module_name)
-      PoisonShadowForGlobal(g, kAsanInitializationOrderMagic);
-    else if (!strict_init_order)
-      dyn_g.initialized = true;
+
+  if (current_dynamic_init_module_name == nullptr) {
+    // First call, poison all globals from other modules.
+    DynInitGlobals().forEach([&](auto &kv) {
+      if (kv.first != module_name) {
+        PoisonDynamicGlobals(kv.second);
+      } else {
+        UnpoisonDynamicGlobals(kv.second,
+                               /*mark_initialized=*/!strict_init_order);
+      }
+      return true;
+    });
+  } else {
+    // Module changed.
+    PoisonDynamicGlobals(DynInitGlobals()[current_dynamic_init_module_name]);
+    UnpoisonDynamicGlobals(DynInitGlobals()[module_name],
+                           /*mark_initialized=*/!strict_init_order);
   }
+  current_dynamic_init_module_name = module_name;
 }
 
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+static bool allow_after_dynamic_init = false;
+
+static void __attribute__((used)) AfterDynamicInit(void) {
+  if (flags()->report_globals >= 3)
+    Printf("AfterDynamicInit\n");
+  if (allow_after_dynamic_init)
+    return;
+  allow_after_dynamic_init = true;
+  __asan_after_dynamic_init();
+}
+
+// Maybe SANITIZER_CAN_USE_PREINIT_ARRAY is to conservative for `.init_array`.
+__attribute__((section(".init_array"), constructor(10000),
+               used)) static void (*__init)(void) = AfterDynamicInit;
+#endif  // SANITIZER_CAN_USE_PREINIT_ARRAY
+
 // This method runs immediately after dynamic initialization in each TU, when
 // all dynamically initialized globals except for those defined in the current
 // TU are poisoned.  It simply unpoisons all dynamically initialized globals.
 void __asan_after_dynamic_init() {
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+  // Ignore all callback until the first one from .init_array, which should
+  // happed after all C++ global constructors.
+  if (!allow_after_dynamic_init)
+    return;
+#endif
+
   if (!flags()->check_initialization_order || !CanPoisonMemory())
     return;
   CHECK(AsanInited());
   Lock lock(&mu_for_globals);
+  if (!current_dynamic_init_module_name)
+    return;
+
   if (flags()->report_globals >= 3)
     Printf("DynInitUnpoison\n");
-  for (const DynInitGlobal &dyn_g : dynamic_init_globals) {
-    const Global *g = &dyn_g.g;
-    if (!dyn_g.initialized) {
-      // Unpoison the whole global.
-      PoisonShadowForGlobal(g, 0);
-      // Poison redzones back.
-      PoisonRedZones(*g);
-    }
-  }
+
+  DynInitGlobals().forEach([&](auto &kv) {
+    UnpoisonDynamicGlobals(kv.second, /*mark_initialized=*/false);
+    return true;
+  });
+
+  current_dynamic_init_module_name = nullptr;
 }
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 9fb1df7ab2b79..04bebe39c3a14 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1990,6 +1990,11 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls(
       // Don't instrument CTORs that will run before asan.module_ctor.
       if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple))
         continue;
+      if (Priority->getLimitedValue() == 65535) {
+        CS->setOperand(0,
+                       ConstantInt::getSigned(Priority->getType(),
+                                              Priority->getLimitedValue() - 1));
+      }
       poisonOneInitializer(*F, ModuleName);
     }
   }
diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
index e1bc58fda0e38..0ed7be9620a6f 100644
--- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp
@@ -9,6 +9,7 @@
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
@@ -16,7 +17,7 @@
 
 using namespace llvm;
 
-static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+static std::unique_ptr<Module> parseIR(LLVMContext &C, StringRef IR) {
   SMDiagnostic Err;
   std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
   if (!Mod)
@@ -24,12 +25,12 @@ static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
   return Mod;
 }
 
-static int getUsedListSize(Module &M, StringRef Name) {
-  auto *UsedList = M.getGlobalVariable(Name);
-  if (!UsedList)
+static int getListSize(Module &M, StringRef Name) {
+  auto *List = M.getGlobalVariable(Name);
+  if (!List)
     return 0;
-  auto *UsedListBaseArrayType = cast<ArrayType>(UsedList->getValueType());
-  return UsedListBaseArrayType->getNumElements();
+  auto *T = cast<ArrayType>(List->getValueType());
+  return T->getNumElements();
 }
 
 TEST(ModuleUtils, AppendToUsedList1) {
@@ -41,13 +42,13 @@ TEST(ModuleUtils, AppendToUsedList1) {
   for (auto &G : M->globals()) {
     Globals.push_back(&G);
   }
-  EXPECT_EQ(0, getUsedListSize(*M, "llvm.compiler.used"));
+  EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used"));
   appendToCompilerUsed(*M, Globals);
-  EXPECT_EQ(1, getUsedListSize(*M, "llvm.compiler.used"));
+  EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used"));
 
-  EXPECT_EQ(0, getUsedListSize(*M, "llvm.used"));
+  EXPECT_EQ(0, getListSize(*M, "llvm.used"));
   appendToUsed(*M, Globals);
-  EXPECT_EQ(1, getUsedListSize(*M, "llvm.used"));
+  EXPECT_EQ(1, getListSize(*M, "llvm.used"));
 }
 
 TEST(ModuleUtils, AppendToUsedList2) {
@@ -59,11 +60,67 @@ TEST(ModuleUtils, AppendToUsedList2) {
   for (auto &G : M->globals()) {
     Globals.push_back(&G);
   }
-  EXPECT_EQ(0, getUsedListSize(*M, "llvm.compiler.used"));
+  EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used"));
   appendToCompilerUsed(*M, Globals);
-  EXPECT_EQ(1, getUsedListSize(*M, "llvm.compiler.used"));
+  EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used"));
 
-  EXPECT_EQ(0, getUsedListSize(*M, "llvm.used"));
+  EXPECT_EQ(0, getListSize(*M, "llvm.used"));
   appendToUsed(*M, Globals);
-  EXPECT_EQ(1, getUsedListSize(*M, "llvm.used"));
+  EXPECT_EQ(1, getListSize(*M, "llvm.used"));
+}
+
+using AppendFnType = decltype(&appendToGlobalCtors);
+using ParamType = std::tuple<StringRef, AppendFnType>;
+class ModuleUtilsTest : public testing::TestWithParam<ParamType> {
+public:
+  StringRef arrayName() const { return std::get<0>(GetParam()); }
+  AppendFnType appendFn() const { return std::get<AppendFnType>(GetParam()); }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    ModuleUtilsTestCtors, ModuleUtilsTest,
+    ::testing::Values(ParamType{"llvm.global_ctors", &appendToGlobalCtors},
+                      ParamType{"llvm.global_dtors", &appendToGlobalDtors}));
+
+TEST_P(ModuleUtilsTest, AppendToMissingArray) {
+  LLVMContext C;
+
+  std::unique_ptr<Module> M = parseIR(C, "");
+
+  EXPECT_EQ(0, getListSize(*M, arrayName()));
+  Function *F = cast<Function>(
+      M->getOrInsertFunction("ctor", Type::getVoidTy(C)).getCallee());
+  appendFn()(*M, F, 11, F);
+  ASSERT_EQ(1, getListSize(*M, arrayName()));
+
+  ConstantArray *CA = dyn_cast<ConstantArray>(
+      M->getGlobalVariable(arrayName())->getInitializer());
+  ASSERT_NE(nullptr, CA);
+  ConstantStruct *CS = dyn_cast<ConstantStruct>(CA->getOperand(0));
+  ASSERT_NE(nullptr, CS);
+  ConstantInt *Pri = dyn_cast<ConstantInt>(CS->getOperand(0));
+  ASSERT_NE(nullptr, Pri);
+  EXPECT_EQ(11u, Pri->getLimitedValue());
+  EXPECT_EQ(F, dyn_cast<Function>(CS->getOperand(1)));
+  EXPECT_EQ(F, CS->getOperand(2));
+}
+
+TEST_P(ModuleUtilsTest, AppendToArray) {
+  LLVMContext C;
+
+  std::unique_ptr<Module> M =
+      parseIR(C, (R"(@)" + arrayName() +
+                  R"( = appending global [2 x { i32, ptr, ptr }] [
+            { i32, ptr, ptr } { i32 65535, ptr  null, ptr null },
+            { i32, ptr, ptr } { i32 0, ptr  null, ptr null }]
+      )")
+                     .str());
+
+  EXPECT_EQ(2, getListSize(*M, arrayName()));
+  appendFn()(
+      *M,
+      cast<Function>(
+          M->getOrInsertFunction("ctor", Type::getVoidTy(C)).getCallee()),
+      11, nullptr);
+  EXPECT_EQ(3, getListSize(*M, arrayName()));
 }



More information about the llvm-commits mailing list