[clang] [llvm] Apply alignment / size in linker rather than IR (PR #111918)

Florian Mayer via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 10 16:06:08 PDT 2024


https://github.com/fmayer created https://github.com/llvm/llvm-project/pull/111918

This greatly simplifies the code, and makes sure no optimizations are
applied that assume the bigger alignment or size, which could be
incorrect if we link together with non-instrumented code.


>From 3a962270521aa7b48b64e5ac5fa0edb900990023 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 10 Oct 2024 16:05:50 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 clang/lib/CodeGen/SanitizerMetadata.cpp       |  45 ++++-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |   7 +-
 llvm/lib/Target/AArch64/AArch64.h             |   2 -
 .../Target/AArch64/AArch64GlobalsTagging.cpp  | 155 ------------------
 .../Target/AArch64/AArch64TargetMachine.cpp   |   2 -
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 -
 .../llvm/lib/Target/AArch64/BUILD.gn          |   1 -
 7 files changed, 46 insertions(+), 167 deletions(-)
 delete mode 100644 llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp

diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 5b212a163611dc..784d9061647f5c 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -34,6 +34,37 @@ static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
   return Mask;
 }
 
+static bool shouldTagGlobal(const llvm::GlobalVariable &G) {
+  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+  // may be worth instrumenting these in future to stop them from being used as
+  // gadgets.
+  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
+    return false;
+
+  // Globals can be placed implicitly or explicitly in sections. There's two
+  // different types of globals that meet this criteria that cause problems:
+  //  1. Function pointers that are going into various init arrays (either
+  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
+  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
+  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
+  //     overaligned and overpadded, making iterating over them problematic, and
+  //     each function pointer is individually tagged (so the iteration over
+  //     them causes SIGSEGV/MTE[AS]ERR).
+  //  2. Global variables put into an explicit section, where the section's name
+  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
+  //     `__stop_<name>` symbol for the section, so that you can iterate over
+  //     globals within this section. Unfortunately, again, these globals would
+  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
+  //
+  // To mitigate both these cases, and because specifying a section is rare
+  // outside of these two cases, disable MTE protection for globals in any
+  // section.
+  if (G.hasSection())
+    return false;
+
+  return true;
+}
+
 void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
                                      SourceLocation Loc, StringRef Name,
                                      QualType Ty,
@@ -60,11 +91,15 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
   Meta.NoHWAddress |= CGM.isInNoSanitizeList(
       FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
 
-  Meta.Memtag |=
-      static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
-  Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
-  Meta.Memtag &= !CGM.isInNoSanitizeList(
-      FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+  if (shouldTagGlobal(*GV)) {
+    Meta.Memtag |=
+        static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
+    Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+    Meta.Memtag &= !CGM.isInNoSanitizeList(
+        FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+  } else {
+    Meta.Memtag = false;
+  }
 
   Meta.IsDynInit = IsDynInit && !Meta.NoAddress &&
                    FsanitizeArgument.has(SanitizerKind::Address) &&
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3a8cde7330efc0..6a2817f417d30d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -764,11 +764,16 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
 
   const DataLayout &DL = GV->getDataLayout();
   uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+  if (GV->isTagged()) Size = alignTo(Size, 16);
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
   // with a specified alignment is a prompt way to break globals emitted to
   // sections and expected to be contiguous (e.g. ObjC metadata).
-  const Align Alignment = getGVAlignment(GV, DL);
+  Align Alignment = getGVAlignment(GV, DL);
+  if (GV->isTagged() && Alignment < 16) {
+    assert(!GV->hasSection());
+    Alignment = Align(16);
+  }
 
   for (auto &Handler : DebugHandlers)
     Handler->setSymbolSize(GVSym, Size);
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 62fbf94e803f0c..ffa578d412b3c2 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -72,7 +72,6 @@ FunctionPass *createAArch64PostLegalizerLowering();
 FunctionPass *createAArch64PostSelectOptimize();
 FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
 FunctionPass *createAArch64StackTaggingPreRAPass();
-ModulePass *createAArch64GlobalsTaggingPass();
 ModulePass *createAArch64Arm64ECCallLoweringPass();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
@@ -89,7 +88,6 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &);
 void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &);
 void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry &);
-void initializeAArch64GlobalsTaggingPass(PassRegistry &);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
 void initializeAArch64MIPeepholeOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
deleted file mode 100644
index a49d391d9148c3..00000000000000
--- a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-#include <algorithm>
-
-using namespace llvm;
-
-static const Align kTagGranuleSize = Align(16);
-
-static bool shouldTagGlobal(const GlobalVariable &G) {
-  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
-  // may be worth instrumenting these in future to stop them from being used as
-  // gadgets.
-  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
-    return false;
-
-  // Globals can be placed implicitly or explicitly in sections. There's two
-  // different types of globals that meet this criteria that cause problems:
-  //  1. Function pointers that are going into various init arrays (either
-  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
-  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
-  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
-  //     overaligned and overpadded, making iterating over them problematic, and
-  //     each function pointer is individually tagged (so the iteration over
-  //     them causes SIGSEGV/MTE[AS]ERR).
-  //  2. Global variables put into an explicit section, where the section's name
-  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
-  //     `__stop_<name>` symbol for the section, so that you can iterate over
-  //     globals within this section. Unfortunately, again, these globals would
-  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
-  //
-  // To mitigate both these cases, and because specifying a section is rare
-  // outside of these two cases, disable MTE protection for globals in any
-  // section.
-  if (G.hasSection())
-    return false;
-
-  return true;
-}
-
-// Technically, due to ELF symbol interposition semantics, we can't change the
-// alignment or size of symbols. If we increase the alignment or size of a
-// symbol, the compiler may make optimisations based on this new alignment or
-// size. If the symbol is interposed, this optimisation could lead to
-// alignment-related or OOB read/write crashes.
-//
-// This is handled in the linker. When the linker sees multiple declarations of
-// a global variable, and some are tagged, and some are untagged, it resolves it
-// to be an untagged definition - but preserves the tag-granule-rounded size and
-// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
-// For cross-DSO, it's been a reasonable contract that if you're interposing a
-// sanitizer-instrumented global, then the interposer also needs to be
-// sanitizer-instrumented.
-//
-// FIXME: In theory, this can be fixed by splitting the size/alignment of
-// globals into two uses: an "output alignment" that's emitted to the ELF file,
-// and an "optimisation alignment" that's used for optimisation. Thus, we could
-// adjust the output alignment only, and still optimise based on the pessimistic
-// pre-tagging size/alignment.
-static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
-  Constant *Initializer = G->getInitializer();
-  uint64_t SizeInBytes =
-      M.getDataLayout().getTypeAllocSize(Initializer->getType());
-
-  uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
-  if (SizeInBytes != NewSize) {
-    // Pad the initializer out to the next multiple of 16 bytes.
-    llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
-    Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
-    Initializer = ConstantStruct::getAnon({Initializer, Padding});
-    auto *NewGV = new GlobalVariable(
-        M, Initializer->getType(), G->isConstant(), G->getLinkage(),
-        Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
-    NewGV->copyAttributesFrom(G);
-    NewGV->setComdat(G->getComdat());
-    NewGV->copyMetadata(G, 0);
-
-    NewGV->takeName(G);
-    G->replaceAllUsesWith(NewGV);
-    G->eraseFromParent();
-    G = NewGV;
-  }
-
-  G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
-
-  // Ensure that tagged globals don't get merged by ICF - as they should have
-  // different tags at runtime.
-  G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-}
-
-namespace {
-class AArch64GlobalsTagging : public ModulePass {
-public:
-  static char ID;
-
-  explicit AArch64GlobalsTagging() : ModulePass(ID) {
-    initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnModule(Module &M) override;
-
-  StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
-};
-} // anonymous namespace
-
-char AArch64GlobalsTagging::ID = 0;
-
-bool AArch64GlobalsTagging::runOnModule(Module &M) {
-  // No mutating the globals in-place, or iterator invalidation occurs.
-  SmallVector<GlobalVariable *> GlobalsToTag;
-  for (GlobalVariable &G : M.globals()) {
-    if (G.isDeclaration() || !G.isTagged())
-      continue;
-
-    assert(G.hasSanitizerMetadata() &&
-           "Missing sanitizer metadata, but symbol is apparently tagged.");
-    if (!shouldTagGlobal(G)) {
-      GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
-      Meta.Memtag = false;
-      G.setSanitizerMetadata(Meta);
-      assert(!G.isTagged());
-    }
-    GlobalsToTag.push_back(&G);
-  }
-
-  for (GlobalVariable *G : GlobalsToTag) {
-    tagGlobalDefinition(M, G);
-  }
-
-  return true;
-}
-
-INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
-                      "AArch64 Globals Tagging Pass", false, false)
-INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
-                    "AArch64 Globals Tagging Pass", false, false)
-
-ModulePass *llvm::createAArch64GlobalsTaggingPass() {
-  return new AArch64GlobalsTagging();
-}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7b0ae23358673e..b8d737f5beee42 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -269,7 +269,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64StackTaggingPreRAPass(*PR);
   initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
   initializeAArch64DAGToDAGISelLegacyPass(*PR);
-  initializeAArch64GlobalsTaggingPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -632,7 +631,6 @@ void AArch64PassConfig::addIRPasses() {
   if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
     addPass(createSelectOptimizePass());
 
-  addPass(createAArch64GlobalsTaggingPass());
   addPass(createAArch64StackTaggingPass(
       /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
 
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index da13db8e68b0e6..2300e479bc1106 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -57,7 +57,6 @@ add_llvm_target(AArch64CodeGen
   AArch64FastISel.cpp
   AArch64A53Fix835769.cpp
   AArch64FrameLowering.cpp
-  AArch64GlobalsTagging.cpp
   AArch64CompressJumpTables.cpp
   AArch64ConditionOptimizer.cpp
   AArch64RedundantCopyElimination.cpp
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index 57570de8813751..6ef0bc7a7d67a1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -126,7 +126,6 @@ static_library("LLVMAArch64CodeGen") {
     "AArch64FalkorHWPFFix.cpp",
     "AArch64FastISel.cpp",
     "AArch64FrameLowering.cpp",
-    "AArch64GlobalsTagging.cpp",
     "AArch64ISelDAGToDAG.cpp",
     "AArch64ISelLowering.cpp",
     "AArch64InstrInfo.cpp",



More information about the cfe-commits mailing list