[llvm] [asan] Ensure __asan_register_elf_globals is called in COMDAT asan.module_ctor (PR #67745)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 10:36:26 PDT 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/67745

>From 8beefce8fd7a3d3a4b733ee0fe0debe5ec72c51a Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 28 Sep 2023 12:04:19 -0700
Subject: [PATCH 1/2] [asan] Ensure __asan_register_elf_globals is called in
 COMDAT asan.module_ctor

On ELF platforms, when there is no global variable, COMDAT asan.module_ctor is
created with no `__asan_register_elf_globals` calls. If this COMDAT is the
prevailing copy selected by the linker, the linkage unit will have no
`__asan_register_elf_globals` call: the redzone will not be poisoned and ODR
violation checker will not work (#67677).

This behavior is benign for -fno-sanitize-address-globals-dead-stripping because
asan.module_ctor functions that call `__asan_register_globals`
(`InstrumentGlobalsWithMetadataArray`) do not use COMDAT.

To fix #67677:

* Use COMDAT for -fsanitize-address-globals-dead-stripping on ELF platforms.
* Call `__asan_register_elf_globals` even if there is no global variable.

Alternatively, when there is no global variable, asan.module_ctor is not COMDAT
and does not call `__asan_register_elf_globals`. However, the asan.module_ctor
function cannot be eliminated by the linker.

Tested the following script. Only ELF -fsanitize-address-globals-dead-stripping has changed behaviors.
```
echo > a.cc  # no global variable, empty uniqueModuleId
echo 'void f() {}' > b.cc  # with global variable, with uniqueModuleId
echo 'int g;' > c.cc  # with global variable
for t in x86_64-linux-gnu arm64-apple-macosx x86_64-windows-msvc; do
  for gc in -f{,no-}sanitize-address-globals-dead-stripping; do
    for f in a.cc b.cc c.cc; do
      echo /tmp/Rel/bin/clang -S --target=$t -fsanitize=address $gc $f -o -
      /tmp/Rel/bin/clang -S --target=$t -fsanitize=address $gc $f -o - | sed -n '/asan.module_ctor/,/ret/p'
    done
  done
done
```
---
 .../Instrumentation/AddressSanitizer.cpp      | 48 ++++++++++---------
 .../Instrumentation/AddressSanitizer/basic.ll |  4 +-
 .../AddressSanitizer/global_metadata_array.ll | 19 ++++++--
 3 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bde5fba20f3b7a6..758997146d3be95 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2237,7 +2237,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
 
   // We also need to unregister globals at the end, e.g., when a shared library
   // gets closed.
-  if (DestructorKind != AsanDtorKind::None) {
+  if (DestructorKind != AsanDtorKind::None && !MetadataGlobals.empty()) {
     IRBuilder<> IrbDtor(CreateAsanModuleDtor(M));
     IrbDtor.CreateCall(AsanUnregisterElfGlobals,
                        {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
@@ -2345,8 +2345,6 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
 // asan constructor is comdat-compatible.
 bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
                                                bool *CtorComdat) {
-  *CtorComdat = false;
-
   // Build set of globals that are aliased by some GA, where
   // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable.
   SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions;
@@ -2364,11 +2362,6 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
 
   size_t n = GlobalsToChange.size();
-  if (n == 0) {
-    *CtorComdat = true;
-    return false;
-  }
-
   auto &DL = M.getDataLayout();
 
   // A global is described by a structure
@@ -2391,8 +2384,11 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
 
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
-  GlobalVariable *ModuleName = createPrivateGlobalForString(
-      M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
+  GlobalVariable *ModuleName =
+      n != 0
+          ? createPrivateGlobalForString(M, M.getModuleIdentifier(),
+                                         /*AllowMerging*/ false, kAsanGenPrefix)
+          : nullptr;
 
   for (size_t i = 0; i < n; i++) {
     GlobalVariable *G = GlobalsToChange[i];
@@ -2517,19 +2513,27 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
   appendToCompilerUsed(M, ArrayRef<GlobalValue *>(GlobalsToAddToUsedList));
 
-  std::string ELFUniqueModuleId =
-      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
-                                                        : "";
-
-  if (!ELFUniqueModuleId.empty()) {
-    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+  if (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) {
+    // Use COMDAT and register globals even if n == 0 to ensure that (a) the
+    // linkage unit will only have one module constructor, and (b) the register
+    // function will be called. The module destructor is not created when n ==
+    // 0.
     *CtorComdat = true;
-  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
-    InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
-  } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
-    InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
+    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers,
+                         getUniqueModuleId(&M));
+  } else if (n == 0) {
+    // When UseGlobalsGC is false, COMDAT can still be used if n == 0, because
+    // all compile units will have identical module constructor/destructor.
+    *CtorComdat = TargetTriple.isOSBinFormatELF();
   } else {
-    InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
+    *CtorComdat = false;
+    if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+      InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
+    } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
+      InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
+    } else {
+      InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
+    }
   }
 
   // Create calls for poisoning before initializers run and unpoisoning after.
@@ -2537,7 +2541,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
     createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);
-  return true;
+  return n != 0;
 }
 
 uint64_t
diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic.ll b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
index 068d6d18cd45eba..f9c5b1439de2f9f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
@@ -210,8 +210,10 @@ define void @test_swifterror_3() sanitize_address {
 
 ;; ctor/dtor have the nounwind attribute. See uwtable.ll, they additionally have
 ;; the uwtable attribute with the module flag "uwtable".
-; CHECK: define internal void @asan.module_ctor() #[[#ATTR:]] {{(comdat )?}}{
+; CHECK: define internal void @asan.module_ctor() #[[#ATTR:]] comdat {
 ; CHECK: call void @__asan_init()
+;; __asan_register_elf_globals is called even if this module does not contain global variables.
+; CHECK: call void @__asan_register_elf_globals(i64 ptrtoint (ptr @___asan_globals_registered to i64), i64 ptrtoint (ptr @__start_asan_globals to i64), i64 ptrtoint (ptr @__stop_asan_globals to i64))
 
 ; CHECK: attributes #[[#ATTR]] = { nounwind }
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
index 0e4d1f168d0163f..1617bf9b67aa7c5 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK %s
-; RUN: opt < %s -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefix=CHECK %s
-; RUN: opt < %s -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefix=CHECK %s
-; RUN: opt < %s -passes=asan -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: opt < a.ll -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK %s
+; RUN: opt < a.ll -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefix=CHECK %s
+; RUN: opt < a.ll -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefix=CHECK %s
+; RUN: opt < a.ll -passes=asan -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
 
+; RUN: opt < empty.ll -passes=asan -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=ELF-NOGC %s
+
+;--- a.ll
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Globals:
@@ -59,3 +63,10 @@ attributes #1 = { nounwind sanitize_address "less-precise-fpmad"="false" "frame-
 !7 = !{!"/tmp/asan-globals.cpp", i32 7, i32 5}
 !8 = !{!"/tmp/asan-globals.cpp", i32 12, i32 14}
 !9 = !{!"/tmp/asan-globals.cpp", i32 14, i32 25}
+
+;; In the presence of instrumented global variables, asan.module_ctor do not use comdat.
+; CHECK: define internal void @asan.module_ctor() #[[#ATTR:]] {
+
+; ELF-NOGC: define internal void @asan.module_ctor() #[[#ATTR:]] comdat {
+
+;--- empty.ll

>From 5d08b2f5cb028b5b3f34d4b5b93b48f7ac5bb39a Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Fri, 29 Sep 2023 10:35:49 -0700
Subject: [PATCH 2/2] Make ModuleAddressSanitizer::InstrumentGlobals return
 void

---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 758997146d3be95..6ec592f377a8634 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -819,7 +819,7 @@ class ModuleAddressSanitizer {
 private:
   void initializeCallbacks(Module &M);
 
-  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
+  void InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
   void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
                              ArrayRef<GlobalVariable *> ExtendedGlobals,
                              ArrayRef<Constant *> MetadataInitializers);
@@ -2343,7 +2343,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
 // redzones and inserts this function into llvm.global_ctors.
 // Sets *CtorComdat to true if the global registration code emitted into the
 // asan constructor is comdat-compatible.
-bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+void ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
                                                bool *CtorComdat) {
   // Build set of globals that are aliased by some GA, where
   // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable.
@@ -2541,7 +2541,6 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
     createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);
-  return n != 0;
 }
 
 uint64_t



More information about the llvm-commits mailing list