[llvm] [SystemZ] Add proper mcount handling (PR #135767)

Dominik Steenken via llvm-commits llvm-commits at lists.llvm.org
Fri May 2 02:18:40 PDT 2025


https://github.com/dominik-steenken updated https://github.com/llvm/llvm-project/pull/135767

>From cab34b13816487590b7f59960e57db049d18d27e Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 11 Apr 2025 10:09:01 +0200
Subject: [PATCH 1/2] [SystemZ] Add proper mcount handling When compiling with
 `-pg`, the `EntryExitInstrumenterPass` will insert calls to the glibc
 function `mcount` at the begining of each `MachineFunction`.

On SystemZ, these calls require special handling:

- The call to `mcount` needs to happen at the beginning of the prologue.
- Prior to the call to `mcount`, register `%r14`, the return address of the
  callee function, must be stored 8 bytes above the stack pointer `%r15`.
  After the call to `mcount` returns, that register needs to be restored.

This commit adds some special handling to the EntryExitInstrumenterPass that
keeps the insertion of the mcount function into the module, but skips over
insertion of the actual call in order to perform this insertion in the
`emitPrologue` function. There, a simple sequence of store/call/load is
inserted, which implements the above.
---
 .../Target/SystemZ/SystemZFrameLowering.cpp   | 27 ++++++++++++++
 .../Utils/EntryExitInstrumenter.cpp           |  6 ++++
 llvm/test/CodeGen/SystemZ/mcount.ll           | 35 +++++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 llvm/test/CodeGen/SystemZ/mcount.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 9561ea544b270..c99faa45d7dc6 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
@@ -558,6 +559,32 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
   // to determine the end of the prologue.
   DebugLoc DL;
 
+  // Add mcount instrumentation if necessary.
+  if (MF.getFunction().getFnAttribute("systemz-backend").getValueAsString() ==
+      "insert-mcount") {
+
+    // Store return address 8 bytes above stack pointer.
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
+        .addReg(SystemZ::R14D)
+        .addReg(SystemZ::R15D)
+        .addImm(8)
+        .addReg(0);
+
+    // Call mcount (Regmask 0 to ensure this will not be moved by the
+    // scheduler.).
+    const uint32_t Mask = 0;
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL))
+        .addGlobalAddress(MF.getFunction().getParent()->getFunction("mcount"))
+        .addRegMask(&Mask);
+
+    // Reload return address drom 8 bytes above stack pointer.
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LG))
+        .addReg(SystemZ::R14D)
+        .addReg(SystemZ::R15D)
+        .addImm(8)
+        .addReg(0);
+  }
+
   // The current offset of the stack pointer from the CFA.
   int64_t SPOffsetFromCFA = -SystemZMC::ELFCFAOffsetFromInitialSP;
 
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index d47f1b4253b54..afe806bbdc416 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -63,6 +63,12 @@ static void insertCall(Function &CurFn, StringRef Func,
                                   false));
       CallInst *Call = CallInst::Create(Fn, RetAddr, "", InsertionPt);
       Call->setDebugLoc(DL);
+    } else if (TargetTriple.isSystemZ()) {
+      M.getOrInsertFunction(Func, Type::getVoidTy(C));
+      // skip insertion for `mcount` on SystemZ. This will be handled later in
+      // `emitPrologue`. Add custom attribute to denote this.
+      CurFn.addFnAttr(
+          llvm::Attribute::get(C, "systemz-backend", "insert-mcount"));
     } else {
       FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
       CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
diff --git a/llvm/test/CodeGen/SystemZ/mcount.ll b/llvm/test/CodeGen/SystemZ/mcount.ll
new file mode 100644
index 0000000000000..01bd34548f125
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/mcount.ll
@@ -0,0 +1,35 @@
+; Test proper insertion of mcount instrumentation
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -o - | FileCheck %s
+;
+; CHECK: # %bb.0:
+; CHECK-NEXT: stg %r14, 8(%r15)
+; CHECK-NEXT: brasl %r14, mcount at PLT
+; CHECK-NEXT: lg %r14, 8(%r15)
+define dso_local signext i32 @fib(i32 noundef signext %n) #0 {
+entry:
+  %n.addr = alloca i32, align 4
+  store i32 %n, ptr %n.addr, align 4
+  %0 = load i32, ptr %n.addr, align 4
+  %cmp = icmp sle i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %1 = load i32, ptr %n.addr, align 4
+  %sub = sub nsw i32 %1, 1
+  %call = call signext i32 @fib(i32 noundef signext %sub)
+  %2 = load i32, ptr %n.addr, align 4
+  %sub1 = sub nsw i32 %2, 2
+  %call2 = call signext i32 @fib(i32 noundef signext %sub1)
+  %add = add nsw i32 %call, %call2
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ 1, %cond.true ], [ %add, %cond.false ]
+  ret i32 %cond
+}
+
+attributes #0 = { "instrument-function-entry-inlined"="mcount" }

>From 77f2ba040f3be18e94f8c43e5b14b63ffbfd789e Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Mon, 28 Apr 2025 13:08:18 +0200
Subject: [PATCH 2/2] Implement review suggestions

---
 .../Target/SystemZ/SystemZFrameLowering.cpp   | 21 ++++++++++++-------
 .../Utils/EntryExitInstrumenter.cpp           |  3 +--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index c99faa45d7dc6..d4b81c4fbdfd2 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Target/TargetMachine.h"
@@ -558,10 +559,10 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
   DebugLoc DL;
-
   // Add mcount instrumentation if necessary.
-  if (MF.getFunction().getFnAttribute("systemz-backend").getValueAsString() ==
-      "insert-mcount") {
+  if (MF.getFunction()
+          .getFnAttribute("systemz-instrument-function-entry")
+          .getValueAsString() == "mcount") {
 
     // Store return address 8 bytes above stack pointer.
     BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
@@ -570,12 +571,16 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
         .addImm(8)
         .addReg(0);
 
-    // Call mcount (Regmask 0 to ensure this will not be moved by the
-    // scheduler.).
-    const uint32_t Mask = 0;
+    // Call mcount (Regmask from CC AnyReg since mcount preserves all normal
+    // argument registers.
+    FunctionCallee FC = MF.getFunction().getParent()->getOrInsertFunction(
+        "mcount", Type::getVoidTy(MF.getFunction().getContext()));
+    const uint32_t *Mask = MF.getSubtarget<SystemZSubtarget>()
+                               .getSpecialRegisters()
+                               ->getCallPreservedMask(MF, CallingConv::AnyReg);
     BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL))
-        .addGlobalAddress(MF.getFunction().getParent()->getFunction("mcount"))
-        .addRegMask(&Mask);
+        .addGlobalAddress(dyn_cast<Function>(FC.getCallee()))
+        .addRegMask(Mask);
 
     // Reload return address drom 8 bytes above stack pointer.
     BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LG))
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index afe806bbdc416..92ff65c05cb91 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -64,11 +64,10 @@ static void insertCall(Function &CurFn, StringRef Func,
       CallInst *Call = CallInst::Create(Fn, RetAddr, "", InsertionPt);
       Call->setDebugLoc(DL);
     } else if (TargetTriple.isSystemZ()) {
-      M.getOrInsertFunction(Func, Type::getVoidTy(C));
       // skip insertion for `mcount` on SystemZ. This will be handled later in
       // `emitPrologue`. Add custom attribute to denote this.
       CurFn.addFnAttr(
-          llvm::Attribute::get(C, "systemz-backend", "insert-mcount"));
+          llvm::Attribute::get(C, "systemz-instrument-function-entry", Func));
     } else {
       FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
       CallInst *Call = CallInst::Create(Fn, "", InsertionPt);



More information about the llvm-commits mailing list