[clang] [llvm] [MTE] decide whether to tag global in AsmPrinter (PR #135891)

Florian Mayer via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 13:27:18 PDT 2025


https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/135891

>From cf7c14e486c47c03560ea4b3c6fe012bf1c7de17 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Tue, 15 Apr 2025 17:36:37 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 clang/lib/CodeGen/SanitizerMetadata.cpp    | 45 +++------------------
 clang/test/CodeGen/memtag-globals.cpp      |  7 ++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 46 ++++++++++++++++++++++
 llvm/lib/IR/Verifier.cpp                   |  4 --
 llvm/unittests/IR/VerifierTest.cpp         | 21 ----------
 5 files changed, 55 insertions(+), 68 deletions(-)

diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index b7b212ba46efd..288f7f6415f44 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -32,37 +32,6 @@ static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
   return Mask;
 }
 
-static bool shouldTagGlobal(const llvm::GlobalVariable &G) {
-  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
-  // may be worth instrumenting these in future to stop them from being used as
-  // gadgets.
-  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
-    return false;
-
-  // Globals can be placed implicitly or explicitly in sections. There's two
-  // different types of globals that meet this criteria that cause problems:
-  //  1. Function pointers that are going into various init arrays (either
-  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
-  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
-  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
-  //     overaligned and overpadded, making iterating over them problematic, and
-  //     each function pointer is individually tagged (so the iteration over
-  //     them causes SIGSEGV/MTE[AS]ERR).
-  //  2. Global variables put into an explicit section, where the section's name
-  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
-  //     `__stop_<name>` symbol for the section, so that you can iterate over
-  //     globals within this section. Unfortunately, again, these globals would
-  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
-  //
-  // To mitigate both these cases, and because specifying a section is rare
-  // outside of these two cases, disable MTE protection for globals in any
-  // section.
-  if (G.hasSection())
-    return false;
-
-  return true;
-}
-
 void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
                                      SourceLocation Loc, StringRef Name,
                                      QualType Ty,
@@ -89,15 +58,11 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
   Meta.NoHWAddress |= CGM.isInNoSanitizeList(
       FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty);
 
-  if (shouldTagGlobal(*GV)) {
-    Meta.Memtag |= static_cast<bool>(FsanitizeArgument.Mask &
-                                     SanitizerKind::MemtagGlobals);
-    Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
-    Meta.Memtag &= !CGM.isInNoSanitizeList(
-        FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
-  } else {
-    Meta.Memtag = false;
-  }
+  Meta.Memtag |=
+      static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals);
+  Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag);
+  Meta.Memtag &= !CGM.isInNoSanitizeList(
+      FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty);
 
   Meta.IsDynInit = IsDynInit && !Meta.NoAddress &&
                    FsanitizeArgument.has(SanitizerKind::Address) &&
diff --git a/clang/test/CodeGen/memtag-globals.cpp b/clang/test/CodeGen/memtag-globals.cpp
index 0d7fe22a7b0f0..407eea1c37cfa 100644
--- a/clang/test/CodeGen/memtag-globals.cpp
+++ b/clang/test/CodeGen/memtag-globals.cpp
@@ -25,8 +25,9 @@ void func() {
 // CHECK: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
 // CHECK: @{{.*}}global{{.*}} ={{.*}} sanitize_memtag
 
-// CHECK:     @{{.*}}section_global{{.*}} =
-// CHECK-NOT: sanitize_memtag
+// This DOES NOT mean we are instrumenting the section global,
+// but we are ignoring it in AsmPrinter rather than in clang.
+// CHECK:     @{{.*}}section_global{{.*}} ={{.*}} sanitize_memtag
 // CHECK:     @{{.*}}attributed_global{{.*}} =
 // CHECK-NOT: sanitize_memtag
 // CHECK:     @{{.*}}disable_instrumentation_global{{.*}} =
@@ -35,7 +36,7 @@ void func() {
 // CHECK-NOT: sanitize_memtag
 
 // CHECK: @{{.*}}static_var{{.*}} ={{.*}} sanitize_memtag
-// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}}
+// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}} sanitize_memtag
 // CHECK: @{{.*}}external_global{{.*}} ={{.*}} sanitize_memtag
 
 // IGNORELIST: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0deaf94502b11..8e79f87bb6cb5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2398,6 +2398,41 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
   OutStreamer->emitBinaryData(Buf);
 }
 
+static bool shouldTagGlobal(const llvm::GlobalVariable &G) {
+  // We used to do this in clang, but there are optimization passes that turn
+  // non-constant globals into constants. So now, clang only tells us whether
+  // it would *like* a global to be tagged, but we still make the decision here.
+  //
+  // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+  // may be worth instrumenting these in future to stop them from being used as
+  // gadgets.
+  if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant())
+    return false;
+
+  // Globals can be placed implicitly or explicitly in sections. There's two
+  // different types of globals that meet this criteria that cause problems:
+  //  1. Function pointers that are going into various init arrays (either
+  //     explicitly through `__attribute__((section(<foo>)))` or implicitly
+  //     through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
+  //     ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
+  //     overaligned and overpadded, making iterating over them problematic, and
+  //     each function pointer is individually tagged (so the iteration over
+  //     them causes SIGSEGV/MTE[AS]ERR).
+  //  2. Global variables put into an explicit section, where the section's name
+  //     is a valid C-style identifier. The linker emits a `__start_<name>` and
+  //     `__stop_<name>` symbol for the section, so that you can iterate over
+  //     globals within this section. Unfortunately, again, these globals would
+  //     be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
+  //
+  // To mitigate both these cases, and because specifying a section is rare
+  // outside of these two cases, disable MTE protection for globals in any
+  // section.
+  if (G.hasSection())
+    return false;
+
+  return true;
+}
+
 static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
   Constant *Initializer = G->getInitializer();
   uint64_t SizeInBytes =
@@ -2430,6 +2465,12 @@ static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
   G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
 }
 
+static void removeMemtagFromGlobal(GlobalVariable &G) {
+  auto Meta = G.getSanitizerMetadata();
+  Meta.Memtag = false;
+  G.setSanitizerMetadata(Meta);
+}
+
 bool AsmPrinter::doFinalization(Module &M) {
   // Set the MachineFunction to nullptr so that we can catch attempted
   // accesses to MF specific features at the module level and so that
@@ -2440,6 +2481,11 @@ bool AsmPrinter::doFinalization(Module &M) {
   for (GlobalVariable &G : M.globals()) {
     if (G.isDeclaration() || !G.isTagged())
       continue;
+    if (!shouldTagGlobal(G)) {
+      assert(G.hasSanitizerMetadata()); // because isTagged.
+      removeMemtagFromGlobal(G);
+      continue;
+    }
     GlobalsToTag.push_back(&G);
   }
   for (GlobalVariable *G : GlobalsToTag)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 7423e746dfa9a..b5bf286a65ce5 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -808,10 +808,6 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
           "visibility must be dso_local!",
           &GV);
 
-  if (GV.isTagged()) {
-    Check(!GV.hasSection(), "tagged GlobalValue must not be in section.", &GV);
-  }
-
   forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool {
     if (const Instruction *I = dyn_cast<Instruction>(V)) {
       if (!I->getParent() || !I->getParent()->getParent())
diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp
index 2b51f3c4ea2c2..7a136e6a382a7 100644
--- a/llvm/unittests/IR/VerifierTest.cpp
+++ b/llvm/unittests/IR/VerifierTest.cpp
@@ -416,26 +416,5 @@ TEST(VerifierTest, GetElementPtrInst) {
       << Error;
 }
 
-TEST(VerifierTest, DetectTaggedGlobalInSection) {
-  LLVMContext C;
-  Module M("M", C);
-  GlobalVariable *GV = new GlobalVariable(
-      Type::getInt64Ty(C), false, GlobalValue::InternalLinkage,
-      ConstantInt::get(Type::getInt64Ty(C), 1));
-  GV->setDSOLocal(true);
-  GlobalValue::SanitizerMetadata MD{};
-  MD.Memtag = true;
-  GV->setSanitizerMetadata(MD);
-  GV->setSection("foo");
-  M.insertGlobalVariable(GV);
-
-  std::string Error;
-  raw_string_ostream ErrorOS(Error);
-  EXPECT_TRUE(verifyModule(M, &ErrorOS));
-  EXPECT_TRUE(
-      StringRef(Error).starts_with("tagged GlobalValue must not be in section"))
-      << Error;
-}
-
 } // end anonymous namespace
 } // end namespace llvm

>From 1ab14cff209e73e09c69a4155ffc436780aee63e Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Wed, 16 Apr 2025 13:27:04 -0700
Subject: [PATCH 2/2] upd

Created using spr 1.3.4
---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8e79f87bb6cb5..ad34465e2c606 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2484,6 +2484,7 @@ bool AsmPrinter::doFinalization(Module &M) {
     if (!shouldTagGlobal(G)) {
       assert(G.hasSanitizerMetadata()); // because isTagged.
       removeMemtagFromGlobal(G);
+      assert(!G.isTagged());
       continue;
     }
     GlobalsToTag.push_back(&G);



More information about the llvm-commits mailing list