[clang] [llvm] [MTE] Apply alignment / size in AsmPrinter rather than IR (PR #111918)

via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 14 17:21:19 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-backend-aarch64

Author: Florian Mayer (fmayer)

<details>
<summary>Changes</summary>

This greatly simplifies the code, and makes sure no optimizations are
applied that assume the bigger alignment or size, which could be
incorrect if we link together with non-instrumented code.


---
Full diff: https://github.com/llvm/llvm-project/pull/111918.diff


10 Files Affected:

- (modified) clang/lib/CodeGen/SanitizerMetadata.cpp (+40-5) 
- (modified) clang/test/CodeGen/memtag-globals.cpp (+4-1) 
- (modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+7-1) 
- (modified) llvm/lib/Target/AArch64/AArch64.h (-2) 
- (removed) llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp (-155) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (-2) 
- (modified) llvm/lib/Target/AArch64/CMakeLists.txt (-1) 
- (modified) llvm/test/CodeGen/AArch64/O0-pipeline.ll (-2) 
- (modified) llvm/test/CodeGen/AArch64/O3-pipeline.ll (-1) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn (-1) 


``````````diff
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 5b212a163611dc..95e3f8a01f14bc 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -34,6 +34,37 @@ static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
   return Mask;
 }
 
+static bool shouldTagGlobal(const llvm::GlobalVariable &G) {
+  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+  // may be worth instrumenting these in future to stop them from being used as
+  // gadgets.
+  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
+    return false;
+
+  // Globals can be placed implicitly or explicitly in sections. There's two
+  // different types of globals that meet this criteria that cause problems:
+  //  1. Function pointers that are going into various init arrays (either
+  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
+  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
+  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
+  //     overaligned and overpadded, making iterating over them problematic, and
+  //     each function pointer is individually tagged (so the iteration over
+  //     them causes SIGSEGV/MTE[AS]ERR).
+  //  2. Global variables put into an explicit section, where the section's name
+  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
+  //     `__stop_<name>` symbol for the section, so that you can iterate over
+  //     globals within this section. Unfortunately, again, these globals would
+  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
+  //
+  // To mitigate both these cases, and because specifying a section is rare
+  // outside of these two cases, disable MTE protection for globals in any
+  // section.
+  if (G.hasSection())
+    return false;
+
+  return true;
+}
+
 void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
                                      SourceLocation Loc, StringRef Name,
                                      QualType Ty,
@@ -60,11 +91,15 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
   Meta.NoHWAddress |= CGM.isInNoSanitizeList(
       FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
 
-  Meta.Memtag |=
-      static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
-  Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
-  Meta.Memtag &= !CGM.isInNoSanitizeList(
-      FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+  if (shouldTagGlobal(*GV)) {
+    Meta.Memtag |= static_cast<bool>(FsanitizeArgument.Mask &
+                                     SanitizerKind::MemtagGlobals);
+    Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+    Meta.Memtag &= !CGM.isInNoSanitizeList(
+        FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+  } else {
+    Meta.Memtag = false;
+  }
 
   Meta.IsDynInit = IsDynInit && !Meta.NoAddress &&
                    FsanitizeArgument.has(SanitizerKind::Address) &&
diff --git a/clang/test/CodeGen/memtag-globals.cpp b/clang/test/CodeGen/memtag-globals.cpp
index b4f5dc0d7dcf04..a721ac6ce3345f 100644
--- a/clang/test/CodeGen/memtag-globals.cpp
+++ b/clang/test/CodeGen/memtag-globals.cpp
@@ -8,6 +8,7 @@
 // RUN:   FileCheck %s --check-prefix=IGNORELIST
 
 int global;
+int __attribute__((__section__("my_section"))) section_global;
 int __attribute__((no_sanitize("memtag"))) attributed_global;
 int __attribute__((disable_sanitizer_instrumentation)) disable_instrumentation_global;
 int ignorelisted_global;
@@ -22,6 +23,8 @@ void func() {
 // CHECK: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
 // CHECK: @{{.*}}global{{.*}} ={{.*}} sanitize_memtag
 
+// CHECK:     @{{.*}}section_global{{.*}} =
+// CHECK-NOT: sanitize_memtag
 // CHECK:     @{{.*}}attributed_global{{.*}} =
 // CHECK-NOT: sanitize_memtag
 // CHECK:     @{{.*}}disable_instrumentation_global{{.*}} =
@@ -30,7 +33,7 @@ void func() {
 // CHECK-NOT: sanitize_memtag
 
 // CHECK: @{{.*}}static_var{{.*}} ={{.*}} sanitize_memtag
-// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}} sanitize_memtag
+// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}}
 // CHECK: @{{.*}}external_global{{.*}} ={{.*}} sanitize_memtag
 
 // IGNORELIST: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3a8cde7330efc0..aade8e1368ee8c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -764,11 +764,17 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
 
   const DataLayout &DL = GV->getDataLayout();
   uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+  if (GV->isTagged())
+    Size = alignTo(Size, 16);
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
   // with a specified alignment is a prompt way to break globals emitted to
   // sections and expected to be contiguous (e.g. ObjC metadata).
-  const Align Alignment = getGVAlignment(GV, DL);
+  Align Alignment = getGVAlignment(GV, DL);
+  if (GV->isTagged() && Alignment < 16) {
+    assert(!GV->hasSection());
+    Alignment = Align(16);
+  }
 
   for (auto &Handler : DebugHandlers)
     Handler->setSymbolSize(GVSym, Size);
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 62fbf94e803f0c..ffa578d412b3c2 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -72,7 +72,6 @@ FunctionPass *createAArch64PostLegalizerLowering();
 FunctionPass *createAArch64PostSelectOptimize();
 FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
 FunctionPass *createAArch64StackTaggingPreRAPass();
-ModulePass *createAArch64GlobalsTaggingPass();
 ModulePass *createAArch64Arm64ECCallLoweringPass();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
@@ -89,7 +88,6 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &);
 void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &);
 void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry &);
-void initializeAArch64GlobalsTaggingPass(PassRegistry &);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
 void initializeAArch64MIPeepholeOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
deleted file mode 100644
index a49d391d9148c3..00000000000000
--- a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-#include <algorithm>
-
-using namespace llvm;
-
-static const Align kTagGranuleSize = Align(16);
-
-static bool shouldTagGlobal(const GlobalVariable &G) {
-  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
-  // may be worth instrumenting these in future to stop them from being used as
-  // gadgets.
-  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
-    return false;
-
-  // Globals can be placed implicitly or explicitly in sections. There's two
-  // different types of globals that meet this criteria that cause problems:
-  //  1. Function pointers that are going into various init arrays (either
-  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
-  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
-  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
-  //     overaligned and overpadded, making iterating over them problematic, and
-  //     each function pointer is individually tagged (so the iteration over
-  //     them causes SIGSEGV/MTE[AS]ERR).
-  //  2. Global variables put into an explicit section, where the section's name
-  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
-  //     `__stop_<name>` symbol for the section, so that you can iterate over
-  //     globals within this section. Unfortunately, again, these globals would
-  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
-  //
-  // To mitigate both these cases, and because specifying a section is rare
-  // outside of these two cases, disable MTE protection for globals in any
-  // section.
-  if (G.hasSection())
-    return false;
-
-  return true;
-}
-
-// Technically, due to ELF symbol interposition semantics, we can't change the
-// alignment or size of symbols. If we increase the alignment or size of a
-// symbol, the compiler may make optimisations based on this new alignment or
-// size. If the symbol is interposed, this optimisation could lead to
-// alignment-related or OOB read/write crashes.
-//
-// This is handled in the linker. When the linker sees multiple declarations of
-// a global variable, and some are tagged, and some are untagged, it resolves it
-// to be an untagged definition - but preserves the tag-granule-rounded size and
-// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
-// For cross-DSO, it's been a reasonable contract that if you're interposing a
-// sanitizer-instrumented global, then the interposer also needs to be
-// sanitizer-instrumented.
-//
-// FIXME: In theory, this can be fixed by splitting the size/alignment of
-// globals into two uses: an "output alignment" that's emitted to the ELF file,
-// and an "optimisation alignment" that's used for optimisation. Thus, we could
-// adjust the output alignment only, and still optimise based on the pessimistic
-// pre-tagging size/alignment.
-static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
-  Constant *Initializer = G->getInitializer();
-  uint64_t SizeInBytes =
-      M.getDataLayout().getTypeAllocSize(Initializer->getType());
-
-  uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
-  if (SizeInBytes != NewSize) {
-    // Pad the initializer out to the next multiple of 16 bytes.
-    llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
-    Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
-    Initializer = ConstantStruct::getAnon({Initializer, Padding});
-    auto *NewGV = new GlobalVariable(
-        M, Initializer->getType(), G->isConstant(), G->getLinkage(),
-        Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
-    NewGV->copyAttributesFrom(G);
-    NewGV->setComdat(G->getComdat());
-    NewGV->copyMetadata(G, 0);
-
-    NewGV->takeName(G);
-    G->replaceAllUsesWith(NewGV);
-    G->eraseFromParent();
-    G = NewGV;
-  }
-
-  G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
-
-  // Ensure that tagged globals don't get merged by ICF - as they should have
-  // different tags at runtime.
-  G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-}
-
-namespace {
-class AArch64GlobalsTagging : public ModulePass {
-public:
-  static char ID;
-
-  explicit AArch64GlobalsTagging() : ModulePass(ID) {
-    initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnModule(Module &M) override;
-
-  StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
-};
-} // anonymous namespace
-
-char AArch64GlobalsTagging::ID = 0;
-
-bool AArch64GlobalsTagging::runOnModule(Module &M) {
-  // No mutating the globals in-place, or iterator invalidation occurs.
-  SmallVector<GlobalVariable *> GlobalsToTag;
-  for (GlobalVariable &G : M.globals()) {
-    if (G.isDeclaration() || !G.isTagged())
-      continue;
-
-    assert(G.hasSanitizerMetadata() &&
-           "Missing sanitizer metadata, but symbol is apparently tagged.");
-    if (!shouldTagGlobal(G)) {
-      GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
-      Meta.Memtag = false;
-      G.setSanitizerMetadata(Meta);
-      assert(!G.isTagged());
-    }
-    GlobalsToTag.push_back(&G);
-  }
-
-  for (GlobalVariable *G : GlobalsToTag) {
-    tagGlobalDefinition(M, G);
-  }
-
-  return true;
-}
-
-INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
-                      "AArch64 Globals Tagging Pass", false, false)
-INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
-                    "AArch64 Globals Tagging Pass", false, false)
-
-ModulePass *llvm::createAArch64GlobalsTaggingPass() {
-  return new AArch64GlobalsTagging();
-}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7b0ae23358673e..b8d737f5beee42 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -269,7 +269,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64StackTaggingPreRAPass(*PR);
   initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
   initializeAArch64DAGToDAGISelLegacyPass(*PR);
-  initializeAArch64GlobalsTaggingPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -632,7 +631,6 @@ void AArch64PassConfig::addIRPasses() {
   if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
     addPass(createSelectOptimizePass());
 
-  addPass(createAArch64GlobalsTaggingPass());
   addPass(createAArch64StackTaggingPass(
       /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
 
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index da13db8e68b0e6..2300e479bc1106 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -57,7 +57,6 @@ add_llvm_target(AArch64CodeGen
   AArch64FastISel.cpp
   AArch64A53Fix835769.cpp
   AArch64FrameLowering.cpp
-  AArch64GlobalsTagging.cpp
   AArch64CompressJumpTables.cpp
   AArch64ConditionOptimizer.cpp
   AArch64RedundantCopyElimination.cpp
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index e01df9ea03e78b..0d079881cb909c 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -25,8 +25,6 @@
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
-; CHECK-NEXT:     AArch64 Globals Tagging
-; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       Lazy Branch Probability Analysis
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index fb94c040ae341a..84ed0d61fceab4 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -72,7 +72,6 @@
 ; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Optimize selects
-; CHECK-NEXT:     AArch64 Globals Tagging
 ; CHECK-NEXT:     Stack Safety Analysis
 ; CHECK-NEXT:       FunctionPass Manager
 ; CHECK-NEXT:         Dominator Tree Construction
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index 57570de8813751..6ef0bc7a7d67a1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -126,7 +126,6 @@ static_library("LLVMAArch64CodeGen") {
     "AArch64FalkorHWPFFix.cpp",
     "AArch64FastISel.cpp",
     "AArch64FrameLowering.cpp",
-    "AArch64GlobalsTagging.cpp",
     "AArch64ISelDAGToDAG.cpp",
     "AArch64ISelLowering.cpp",
     "AArch64InstrInfo.cpp",

``````````

</details>


https://github.com/llvm/llvm-project/pull/111918


More information about the cfe-commits mailing list