[clang] [llvm] [MTE] Apply alignment / size in AsmPrinter rather than IR (PR #111918)
Florian Mayer via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 15 13:33:47 PDT 2024
https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/111918
>From 3a962270521aa7b48b64e5ac5fa0edb900990023 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 10 Oct 2024 16:05:50 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
clang/lib/CodeGen/SanitizerMetadata.cpp | 45 ++++-
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 7 +-
llvm/lib/Target/AArch64/AArch64.h | 2 -
.../Target/AArch64/AArch64GlobalsTagging.cpp | 155 ------------------
.../Target/AArch64/AArch64TargetMachine.cpp | 2 -
llvm/lib/Target/AArch64/CMakeLists.txt | 1 -
.../llvm/lib/Target/AArch64/BUILD.gn | 1 -
7 files changed, 46 insertions(+), 167 deletions(-)
delete mode 100644 llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 5b212a163611dc..784d9061647f5c 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -34,6 +34,37 @@ static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
return Mask;
}
+static bool shouldTagGlobal(const llvm::GlobalVariable &G) {
+ // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+ // may be worth instrumenting these in future to stop them from being used as
+ // gadgets.
+ if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
+ return false;
+
+ // Globals can be placed implicitly or explicitly in sections. There's two
+ // different types of globals that meet this criteria that cause problems:
+ // 1. Function pointers that are going into various init arrays (either
+ // explicitly through `__attribute__((section(<foo>)))` or implicitly
+ // through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
+ // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
+ // overaligned and overpadded, making iterating over them problematic, and
+ // each function pointer is individually tagged (so the iteration over
+ // them causes SIGSEGV/MTE[AS]ERR).
+ // 2. Global variables put into an explicit section, where the section's name
+ // is a valid C-style identifier. The linker emits a `__start_<name>` and
+ // `__stop_<name>` symbol for the section, so that you can iterate over
+ // globals within this section. Unfortunately, again, these globals would
+ // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
+ //
+ // To mitigate both these cases, and because specifying a section is rare
+ // outside of these two cases, disable MTE protection for globals in any
+ // section.
+ if (G.hasSection())
+ return false;
+
+ return true;
+}
+
void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
SourceLocation Loc, StringRef Name,
QualType Ty,
@@ -60,11 +91,15 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
Meta.NoHWAddress |= CGM.isInNoSanitizeList(
FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
- Meta.Memtag |=
- static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
- Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
- Meta.Memtag &= !CGM.isInNoSanitizeList(
- FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+ if (shouldTagGlobal(*GV)) {
+ Meta.Memtag |=
+ static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
+ Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+ Meta.Memtag &= !CGM.isInNoSanitizeList(
+ FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
+ } else {
+ Meta.Memtag = false;
+ }
Meta.IsDynInit = IsDynInit && !Meta.NoAddress &&
FsanitizeArgument.has(SanitizerKind::Address) &&
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3a8cde7330efc0..6a2817f417d30d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -764,11 +764,16 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
const DataLayout &DL = GV->getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+ if (GV->isTagged()) Size = alignTo(Size, 16);
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- const Align Alignment = getGVAlignment(GV, DL);
+ Align Alignment = getGVAlignment(GV, DL);
+ if (GV->isTagged() && Alignment < 16) {
+ assert(!GV->hasSection());
+ Alignment = Align(16);
+ }
for (auto &Handler : DebugHandlers)
Handler->setSymbolSize(GVSym, Size);
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 62fbf94e803f0c..ffa578d412b3c2 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -72,7 +72,6 @@ FunctionPass *createAArch64PostLegalizerLowering();
FunctionPass *createAArch64PostSelectOptimize();
FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPreRAPass();
-ModulePass *createAArch64GlobalsTaggingPass();
ModulePass *createAArch64Arm64ECCallLoweringPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
@@ -89,7 +88,6 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &);
void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry &);
-void initializeAArch64GlobalsTaggingPass(PassRegistry &);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
deleted file mode 100644
index a49d391d9148c3..00000000000000
--- a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-#include <algorithm>
-
-using namespace llvm;
-
-static const Align kTagGranuleSize = Align(16);
-
-static bool shouldTagGlobal(const GlobalVariable &G) {
- // For now, don't instrument constant data, as it'll be in .rodata anyway. It
- // may be worth instrumenting these in future to stop them from being used as
- // gadgets.
- if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
- return false;
-
- // Globals can be placed implicitly or explicitly in sections. There's two
- // different types of globals that meet this criteria that cause problems:
- // 1. Function pointers that are going into various init arrays (either
- // explicitly through `__attribute__((section(<foo>)))` or implicitly
- // through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
- // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
- // overaligned and overpadded, making iterating over them problematic, and
- // each function pointer is individually tagged (so the iteration over
- // them causes SIGSEGV/MTE[AS]ERR).
- // 2. Global variables put into an explicit section, where the section's name
- // is a valid C-style identifier. The linker emits a `__start_<name>` and
- // `__stop_<name>` symbol for the section, so that you can iterate over
- // globals within this section. Unfortunately, again, these globals would
- // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
- //
- // To mitigate both these cases, and because specifying a section is rare
- // outside of these two cases, disable MTE protection for globals in any
- // section.
- if (G.hasSection())
- return false;
-
- return true;
-}
-
-// Technically, due to ELF symbol interposition semantics, we can't change the
-// alignment or size of symbols. If we increase the alignment or size of a
-// symbol, the compiler may make optimisations based on this new alignment or
-// size. If the symbol is interposed, this optimisation could lead to
-// alignment-related or OOB read/write crashes.
-//
-// This is handled in the linker. When the linker sees multiple declarations of
-// a global variable, and some are tagged, and some are untagged, it resolves it
-// to be an untagged definition - but preserves the tag-granule-rounded size and
-// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
-// For cross-DSO, it's been a reasonable contract that if you're interposing a
-// sanitizer-instrumented global, then the interposer also needs to be
-// sanitizer-instrumented.
-//
-// FIXME: In theory, this can be fixed by splitting the size/alignment of
-// globals into two uses: an "output alignment" that's emitted to the ELF file,
-// and an "optimisation alignment" that's used for optimisation. Thus, we could
-// adjust the output alignment only, and still optimise based on the pessimistic
-// pre-tagging size/alignment.
-static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
- Constant *Initializer = G->getInitializer();
- uint64_t SizeInBytes =
- M.getDataLayout().getTypeAllocSize(Initializer->getType());
-
- uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
- if (SizeInBytes != NewSize) {
- // Pad the initializer out to the next multiple of 16 bytes.
- llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
- Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
- Initializer = ConstantStruct::getAnon({Initializer, Padding});
- auto *NewGV = new GlobalVariable(
- M, Initializer->getType(), G->isConstant(), G->getLinkage(),
- Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
- NewGV->copyAttributesFrom(G);
- NewGV->setComdat(G->getComdat());
- NewGV->copyMetadata(G, 0);
-
- NewGV->takeName(G);
- G->replaceAllUsesWith(NewGV);
- G->eraseFromParent();
- G = NewGV;
- }
-
- G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
-
- // Ensure that tagged globals don't get merged by ICF - as they should have
- // different tags at runtime.
- G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-}
-
-namespace {
-class AArch64GlobalsTagging : public ModulePass {
-public:
- static char ID;
-
- explicit AArch64GlobalsTagging() : ModulePass(ID) {
- initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
-};
-} // anonymous namespace
-
-char AArch64GlobalsTagging::ID = 0;
-
-bool AArch64GlobalsTagging::runOnModule(Module &M) {
- // No mutating the globals in-place, or iterator invalidation occurs.
- SmallVector<GlobalVariable *> GlobalsToTag;
- for (GlobalVariable &G : M.globals()) {
- if (G.isDeclaration() || !G.isTagged())
- continue;
-
- assert(G.hasSanitizerMetadata() &&
- "Missing sanitizer metadata, but symbol is apparently tagged.");
- if (!shouldTagGlobal(G)) {
- GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
- Meta.Memtag = false;
- G.setSanitizerMetadata(Meta);
- assert(!G.isTagged());
- }
- GlobalsToTag.push_back(&G);
- }
-
- for (GlobalVariable *G : GlobalsToTag) {
- tagGlobalDefinition(M, G);
- }
-
- return true;
-}
-
-INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
- "AArch64 Globals Tagging Pass", false, false)
-INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
- "AArch64 Globals Tagging Pass", false, false)
-
-ModulePass *llvm::createAArch64GlobalsTaggingPass() {
- return new AArch64GlobalsTagging();
-}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7b0ae23358673e..b8d737f5beee42 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -269,7 +269,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64StackTaggingPreRAPass(*PR);
initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
initializeAArch64DAGToDAGISelLegacyPass(*PR);
- initializeAArch64GlobalsTaggingPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -632,7 +631,6 @@ void AArch64PassConfig::addIRPasses() {
if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
addPass(createSelectOptimizePass());
- addPass(createAArch64GlobalsTaggingPass());
addPass(createAArch64StackTaggingPass(
/*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index da13db8e68b0e6..2300e479bc1106 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -57,7 +57,6 @@ add_llvm_target(AArch64CodeGen
AArch64FastISel.cpp
AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
- AArch64GlobalsTagging.cpp
AArch64CompressJumpTables.cpp
AArch64ConditionOptimizer.cpp
AArch64RedundantCopyElimination.cpp
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index 57570de8813751..6ef0bc7a7d67a1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -126,7 +126,6 @@ static_library("LLVMAArch64CodeGen") {
"AArch64FalkorHWPFFix.cpp",
"AArch64FastISel.cpp",
"AArch64FrameLowering.cpp",
- "AArch64GlobalsTagging.cpp",
"AArch64ISelDAGToDAG.cpp",
"AArch64ISelLowering.cpp",
"AArch64InstrInfo.cpp",
>From 92c06a480abc8529ce2e59fbfc202e115b526119 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 10 Oct 2024 17:06:26 -0700
Subject: [PATCH 2/4] fmt
Created using spr 1.3.4
---
clang/lib/CodeGen/SanitizerMetadata.cpp | 4 ++--
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 784d9061647f5c..95e3f8a01f14bc 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -92,8 +92,8 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
if (shouldTagGlobal(*GV)) {
- Meta.Memtag |=
- static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
+ Meta.Memtag |= static_cast<bool>(FsanitizeArgument.Mask &
+ SanitizerKind::MemtagGlobals);
Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
Meta.Memtag &= !CGM.isInNoSanitizeList(
FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6a2817f417d30d..aade8e1368ee8c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -764,7 +764,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
const DataLayout &DL = GV->getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
- if (GV->isTagged()) Size = alignTo(Size, 16);
+ if (GV->isTagged())
+ Size = alignTo(Size, 16);
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
>From ca42ec8a3d859ad3840a262b95add2b43a415ce5 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Fri, 11 Oct 2024 11:27:14 -0700
Subject: [PATCH 3/4] test
Created using spr 1.3.4
---
llvm/test/CodeGen/AArch64/O0-pipeline.ll | 2 --
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 -
2 files changed, 3 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index e01df9ea03e78b..0d079881cb909c 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -25,8 +25,6 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
-; CHECK-NEXT: AArch64 Globals Tagging
-; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index fb94c040ae341a..84ed0d61fceab4 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -72,7 +72,6 @@
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Optimize selects
-; CHECK-NEXT: AArch64 Globals Tagging
; CHECK-NEXT: Stack Safety Analysis
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
>From b8029b2c8d79cef7fab8156b9d4e00c1bc147e85 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Fri, 11 Oct 2024 14:12:58 -0700
Subject: [PATCH 4/4] test
Created using spr 1.3.4
---
clang/test/CodeGen/memtag-globals.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/memtag-globals.cpp b/clang/test/CodeGen/memtag-globals.cpp
index b4f5dc0d7dcf04..a721ac6ce3345f 100644
--- a/clang/test/CodeGen/memtag-globals.cpp
+++ b/clang/test/CodeGen/memtag-globals.cpp
@@ -8,6 +8,7 @@
// RUN: FileCheck %s --check-prefix=IGNORELIST
int global;
+int __attribute__((__section__("my_section"))) section_global;
int __attribute__((no_sanitize("memtag"))) attributed_global;
int __attribute__((disable_sanitizer_instrumentation)) disable_instrumentation_global;
int ignorelisted_global;
@@ -22,6 +23,8 @@ void func() {
// CHECK: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
// CHECK: @{{.*}}global{{.*}} ={{.*}} sanitize_memtag
+// CHECK: @{{.*}}section_global{{.*}} =
+// CHECK-NOT: sanitize_memtag
// CHECK: @{{.*}}attributed_global{{.*}} =
// CHECK-NOT: sanitize_memtag
// CHECK: @{{.*}}disable_instrumentation_global{{.*}} =
@@ -30,7 +33,7 @@ void func() {
// CHECK-NOT: sanitize_memtag
// CHECK: @{{.*}}static_var{{.*}} ={{.*}} sanitize_memtag
-// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}} sanitize_memtag
+// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}}
// CHECK: @{{.*}}external_global{{.*}} ={{.*}} sanitize_memtag
// IGNORELIST: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
More information about the cfe-commits
mailing list