[llvm] [SystemZ] Eliminate call sequence instructions early. (PR #77812)

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 06:30:25 PDT 2024


https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/77812

>From 1245999bdc5a3f9a3868be05b0047593febad7b1 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 10 Jan 2024 20:44:22 -0600
Subject: [PATCH 1/6] Try in common code. In SystemZ only. Try removeing
 callseq instructions. was 741b28ae

---
 .../Target/SystemZ/SystemZISelLowering.cpp    | 22 +++++++++++++++++++
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |  2 ++
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |  2 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td   |  7 +++---
 llvm/test/CodeGen/SystemZ/call-zos-vararg.ll  |  5 ++++-
 llvm/test/CodeGen/SystemZ/cond-move-04.mir    |  2 --
 llvm/test/CodeGen/SystemZ/cond-move-08.mir    |  2 --
 .../SystemZ/cond-move-regalloc-hints-02.mir   |  2 --
 .../SystemZ/cond-move-regalloc-hints.mir      |  6 -----
 llvm/test/CodeGen/SystemZ/frame-28.mir        |  2 --
 llvm/test/CodeGen/SystemZ/swifterror.ll       |  8 +++----
 .../vector-constrained-fp-intrinsics.ll       | 16 +++++++-------
 12 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index da4bcd7f0c66ed..7d3cc831a64ebc 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8197,6 +8197,24 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
   MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
+                                        MachineBasicBlock *BB) const {
+  // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
+  // remove these nodes. Given that these nodes start out as a glued sequence
+  // it seems best to remove them here after instruction selection and
+  // scheduling.  NB: MIR testing does not work (yet) for call frames with
+  // this.
+  MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
+  uint32_t NumBytes = MI.getOperand(0).getImm();
+  if (NumBytes > MFI.getMaxCallFrameSize())
+    MFI.setMaxCallFrameSize(NumBytes);
+  MFI.setAdjustsStack(true);
+
+  MI.eraseFromParent();
+  return BB;
+}
+
 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
 MachineBasicBlock *
 SystemZTargetLowering::emitSelect(MachineInstr &MI,
@@ -9400,6 +9418,10 @@ getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    return emitAdjCallStack(MI, MBB);
+
   case SystemZ::Select32:
   case SystemZ::Select64:
   case SystemZ::Select128:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4943c5cb703c33..7140287a886ccf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -760,6 +760,8 @@ class SystemZTargetLowering : public TargetLowering {
                                   MachineBasicBlock *Target) const;
 
   // Implement EmitInstrWithCustomInserter for individual operation types.
+  MachineBasicBlock *emitAdjCallStack(MachineInstr &MI,
+                                      MachineBasicBlock *BB) const;
   MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
                                    unsigned StoreOpcode, unsigned STOCOpcode,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2a6dce863c28f1..950548abcfa92c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
 void SystemZInstrInfo::anchor() {}
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
-    : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+    : SystemZGenInstrInfo(-1, -1),
       RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
       STI(sti) {}
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 96ea65b6c3d881..04a9467ec9a5f9 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -13,9 +13,10 @@ def IsTargetELF           : Predicate<"Subtarget->isTargetELF()">;
 // Stack allocation
 //===----------------------------------------------------------------------===//
 
-// The callseq_start node requires the hasSideEffects flag, even though these
-// instructions are noops on SystemZ.
-let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
+// These pseudos are removed after instruction selection while updating the
+// values of MaxcallFrameSize and AdjustsStack which are needed during frame
+// lowering.  The callseq_start node requires the hasSideEffects flag.
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
   def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
                                 [(callseq_start timm:$amt1, timm:$amt2)]>;
   def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
index 8290dbfe23104f..4637ff2a1b65b5 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -88,13 +88,16 @@ entry:
   ret i64 %retval
 }
 
+; TODO: Unfortunately the lgdr is scheduled below the COPY from $r1d, causing
+; an overlap and thus an extra copy.
 ; CHECK-LABEL: call_vararg_both0:
 ; CHECK:         stmg 6, 7, 1872(4)
 ; CHECK-NEXT:    aghi 4, -192
 ; CHECK-NEXT:    lg 6, 40(5)
 ; CHECK-NEXT:    lg 5, 32(5)
+; CHECK-NEXT:    lgdr 0, 0
 ; CHECK-NEXT:    lgr 2, 1
-; CHECK-NEXT:    lgdr 1, 0
+; CHECK-NEXT:    lgr 1, 0
 ; CHECK-NEXT:    basr 7, 6
 ; CHECK-NEXT:    bcr 0, 0
 ; CHECK-NEXT:    lg 7, 2072(4)
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-04.mir b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
index 23fd2739698a40..ab4a14cfaee87b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-04.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
@@ -65,12 +65,10 @@ body:             |
     CHIMux %3, 0, implicit-def $cc
     %0 = LOCRMux undef %0, %5, 14, 6, implicit $cc
     %0 = LOCRMux %0, %2, 14, 6, implicit killed $cc
-    ADJCALLSTACKDOWN 0, 0
     %7 = LGFR %0
     $r3d = LGHI 0
     $r4d = COPY %7
     CallBRASL @foo, undef $r2d, killed $r3d, killed $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d
-    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
index 64c6d069799282..2ea67dcce067bc 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
@@ -155,9 +155,7 @@ body:             |
     J %bb.4
 
   bb.4.bb33:
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
-    ADJCALLSTACKUP 0, 0
     STRL %4, @globvar :: (store (s32) into @globvar)
     CLFIMux undef %23:grx32bit, 1, implicit-def $cc
     %25:grx32bit = LHIMux 0
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
index 2701a1dc034a22..8a7929c9eb2c31 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
@@ -45,11 +45,9 @@ body:             |
     %11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc
     CHIMux %6, 2, implicit-def $cc
     %0:gr32bit = SELRMux %11, %5, 14, 8, implicit killed $cc
-    ADJCALLSTACKDOWN 0, 0
     %10:gr64bit = LGFR %0
     $r2d = COPY %10
     CallBRASL @foo, killed $r2d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
-    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
index c98ffda8372721..009fd6ce82679b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
@@ -200,18 +200,12 @@ body:             |
   
     %32:gr64bit = COPY $r3d
     %0:gr64bit = COPY $r2d
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %1:addr64bit = COPY $r2d
-    ADJCALLSTACKUP 0, 0
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %2:addr64bit = COPY $r2d
-    ADJCALLSTACKUP 0, 0
     %3:gr32bit = AHIMuxK %0.subreg_l32, -1, implicit-def dead $cc
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
-    ADJCALLSTACKUP 0, 0
     %55:gr32bit = AHIMuxK %0.subreg_l32, 3, implicit-def dead $cc
     %56:addr64bit = LGHI 0
     %57:gr64bit = COPY %0
diff --git a/llvm/test/CodeGen/SystemZ/frame-28.mir b/llvm/test/CodeGen/SystemZ/frame-28.mir
index 13337dba6ec53f..254b8a2cf2461b 100644
--- a/llvm/test/CodeGen/SystemZ/frame-28.mir
+++ b/llvm/test/CodeGen/SystemZ/frame-28.mir
@@ -179,9 +179,7 @@ body:             |
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.1, 0, $noreg
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
-    ADJCALLSTACKUP 0, 0
     $f17d = IMPLICIT_DEF
     VST64 renamable $f17d, %stack.1, 0, $noreg
     Return
diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index 3ea29f1d830ec6..1b18287cac1468 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -30,8 +30,8 @@ entry:
 define float @caller(ptr %error_ref) {
 ; CHECK-LABEL: caller:
 ; Make a copy of error_ref because r2 is getting clobbered
-; CHECK: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
+; CHECK-DAG: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: %r2, %r9
 ; CHECK: jlh
@@ -197,7 +197,7 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-LABEL: foo_sret:
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3
-; CHECK: lghi %r2, 16
+; CHECK-DAG: lghi %r2, 16
 ; CHECK: brasl %r14, malloc
 ; CHECK: mvi 8(%r2), 1
 ; CHECK: st %r[[REG2]], 4(%r[[REG1]])
@@ -280,7 +280,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3
 ; The first swifterror value:
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: ltgr %r2, %r9
 ; CHECK: jlh
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 69e1c2f4aa0af3..9d77744f18ca1a 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -1649,8 +1649,8 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI36_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f8
@@ -1707,14 +1707,14 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_1
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f8, %f0
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_2
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f9, %f0
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    ler %f2, %f9
@@ -1784,14 +1784,14 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    std %f0, 16(%r13)
@@ -1865,20 +1865,20 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_3
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f10, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f10

>From 1f34bf3d32c5d1e6bdc974c9577650c1c805258d Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 11:41:21 -0600
Subject: [PATCH 2/6] Remove eliminateCallFramePseudoInstr and MIR comment

---
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp    |  4 ++++
 llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 ----------------
 llvm/lib/Target/SystemZ/SystemZFrameLowering.h   |  3 ---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp  |  8 +++++---
 llvm/lib/Target/SystemZ/SystemZLongBranch.cpp    |  2 ++
 5 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 5696ae117d69f0..3991fce65f03e5 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -199,6 +199,10 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
   SystemZMCInstLower Lower(MF->getContext(), *this);
   MCInst LoweredMI;
   switch (MI->getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    return;
+
   case SystemZ::Return:
     LoweredMI = MCInstBuilder(SystemZ::BR)
       .addReg(SystemZ::R14D);
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 80c994a32ea96a..4897b37d8eb1ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,22 +66,6 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
   return std::make_unique<SystemZELFFrameLowering>();
 }
 
-MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MI) const {
-  switch (MI->getOpcode()) {
-  case SystemZ::ADJCALLSTACKDOWN:
-  case SystemZ::ADJCALLSTACKUP:
-    assert(hasReservedCallFrame(MF) &&
-           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
-    return MBB.erase(MI);
-    break;
-
-  default:
-    llvm_unreachable("Unexpected call frame instruction");
-  }
-}
-
 namespace {
 struct SZFrameSortingObj {
   bool IsValid = false;     // True if we care about this Object.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 95f30e3c0d99c8..03ce8882c4de5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,9 +41,6 @@ class SystemZFrameLowering : public TargetFrameLowering {
   }
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
-  MachineBasicBlock::iterator
-  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI) const override;
 };
 
 class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 7d3cc831a64ebc..071dc7aa21fa22 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8203,15 +8203,17 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
   // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
   // remove these nodes. Given that these nodes start out as a glued sequence
   // it seems best to remove them here after instruction selection and
-  // scheduling.  NB: MIR testing does not work (yet) for call frames with
-  // this.
+  // scheduling.
   MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
   uint32_t NumBytes = MI.getOperand(0).getImm();
   if (NumBytes > MFI.getMaxCallFrameSize())
     MFI.setMaxCallFrameSize(NumBytes);
   MFI.setAdjustsStack(true);
 
-  MI.eraseFromParent();
+  // TODO: MI should be erased. For now, keep it around as it seems to help
+  // scheduling around calls slightly in general (fix MachineScheduler).
+  MI.getOperand(0).setImm(0);
+
   return BB;
 }
 
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 632218cc61eefe..9cc7e7ac8b76a4 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -218,6 +218,8 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI,
           // These do not have a size:
           MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
           MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER ||
+          MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
+          MI.getOpcode() == SystemZ::ADJCALLSTACKUP ||
           // These have a size that may be zero:
           MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
           MI.getOpcode() == SystemZ::PATCHPOINT) &&

>From 0e652a6010248a59d19ba4bed823b98a3f54b4ec Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 12:38:30 -0600
Subject: [PATCH 3/6] Remove in PostRA pseudos instead

---
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 4 ----
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  | 5 +++++
 llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 2 --
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3991fce65f03e5..5696ae117d69f0 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -199,10 +199,6 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
   SystemZMCInstLower Lower(MF->getContext(), *this);
   MCInst LoweredMI;
   switch (MI->getOpcode()) {
-  case SystemZ::ADJCALLSTACKDOWN:
-  case SystemZ::ADJCALLSTACKUP:
-    return;
-
   case SystemZ::Return:
     LoweredMI = MCInstBuilder(SystemZ::BR)
       .addReg(SystemZ::R14D);
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 950548abcfa92c..c3e563d0b5ecd2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1513,6 +1513,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandLoadStackGuard(&MI);
     return true;
 
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    MI.eraseFromParent();
+    return true;
+
   default:
     return false;
   }
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9cc7e7ac8b76a4..632218cc61eefe 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -218,8 +218,6 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI,
           // These do not have a size:
           MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
           MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER ||
-          MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
-          MI.getOpcode() == SystemZ::ADJCALLSTACKUP ||
           // These have a size that may be zero:
           MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
           MI.getOpcode() == SystemZ::PATCHPOINT) &&

>From 882bbf4a95adf2e9bf0111d245f0bf2fc1200e65 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 14:33:46 -0600
Subject: [PATCH 4/6] Just set to 0

---
 llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 ++++++++++++++++
 llvm/lib/Target/SystemZ/SystemZFrameLowering.h   |  3 +++
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp  |  8 +++++---
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp     |  7 +------
 4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 4897b37d8eb1ef..80c994a32ea96a 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,6 +66,22 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
   return std::make_unique<SystemZELFFrameLowering>();
 }
 
+MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MI) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    assert(hasReservedCallFrame(MF) &&
+           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+    return MBB.erase(MI);
+    break;
+
+  default:
+    llvm_unreachable("Unexpected call frame instruction");
+  }
+}
+
 namespace {
 struct SZFrameSortingObj {
   bool IsValid = false;     // True if we care about this Object.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 03ce8882c4de5d..95f30e3c0d99c8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,6 +41,9 @@ class SystemZFrameLowering : public TargetFrameLowering {
   }
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
+  MachineBasicBlock::iterator
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const override;
 };
 
 class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 071dc7aa21fa22..af59a55b091ee3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8202,7 +8202,7 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
                                         MachineBasicBlock *BB) const {
   // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
   // remove these nodes. Given that these nodes start out as a glued sequence
-  // it seems best to remove them here after instruction selection and
+  // it seems best to handle them here after instruction selection and
   // scheduling.
   MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
   uint32_t NumBytes = MI.getOperand(0).getImm();
@@ -8210,8 +8210,10 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
     MFI.setMaxCallFrameSize(NumBytes);
   MFI.setAdjustsStack(true);
 
-  // TODO: MI should be erased. For now, keep it around as it seems to help
-  // scheduling around calls slightly in general (fix MachineScheduler).
+  // Set the NumBytes value to 0 to avoid problems of maintaining the call
+  // frame size across CFG edges.  TODO: MI could be erased, but it seems to
+  // help scheduling around calls slightly (fix MachineScheduler + handle the
+  // adjustsStack implication).
   MI.getOperand(0).setImm(0);
 
   return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c3e563d0b5ecd2..2a6dce863c28f1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
 void SystemZInstrInfo::anchor() {}
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
-    : SystemZGenInstrInfo(-1, -1),
+    : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
       RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
       STI(sti) {}
 
@@ -1513,11 +1513,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandLoadStackGuard(&MI);
     return true;
 
-  case SystemZ::ADJCALLSTACKDOWN:
-  case SystemZ::ADJCALLSTACKUP:
-    MI.eraseFromParent();
-    return true;
-
   default:
     return false;
   }

>From 61c18df09b822b53aa3cf7b8ce022bf7d2a4ca89 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Mon, 15 Jan 2024 13:29:28 -0600
Subject: [PATCH 5/6] Remove mapping, and remove MIs during PEI.

---
 .../Target/SystemZ/SystemZFrameLowering.cpp   | 30 +++++++++----------
 .../lib/Target/SystemZ/SystemZFrameLowering.h |  3 --
 .../Target/SystemZ/SystemZISelLowering.cpp    | 12 ++++----
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |  2 +-
 llvm/test/CodeGen/SystemZ/call-zos-vararg.ll  |  5 +---
 llvm/test/CodeGen/SystemZ/cond-move-04.mir    |  2 ++
 llvm/test/CodeGen/SystemZ/cond-move-08.mir    |  2 ++
 .../SystemZ/cond-move-regalloc-hints-02.mir   |  2 ++
 .../SystemZ/cond-move-regalloc-hints.mir      |  6 ++++
 llvm/test/CodeGen/SystemZ/frame-28.mir        |  2 ++
 llvm/test/CodeGen/SystemZ/swifterror.ll       |  8 ++---
 .../vector-constrained-fp-intrinsics.ll       | 16 +++++-----
 12 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 80c994a32ea96a..dc7e6589b48af2 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,22 +66,6 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
   return std::make_unique<SystemZELFFrameLowering>();
 }
 
-MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MI) const {
-  switch (MI->getOpcode()) {
-  case SystemZ::ADJCALLSTACKDOWN:
-  case SystemZ::ADJCALLSTACKUP:
-    assert(hasReservedCallFrame(MF) &&
-           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
-    return MBB.erase(MI);
-    break;
-
-  default:
-    llvm_unreachable("Unexpected call frame instruction");
-  }
-}
-
 namespace {
 struct SZFrameSortingObj {
   bool IsValid = false;     // True if we care about this Object.
@@ -439,6 +423,16 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
+static void removeCallSeqPseudos(MachineFunction &MF) {
+  // TODO: These could have been removed in finalize isel already as they are
+  // not mapped as frame instructions. See comment in emitAdjCallStack().
+  for (auto &MBB : MF)
+    for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+      if (MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
+          MI.getOpcode() == SystemZ::ADJCALLSTACKUP)
+        MI.eraseFromParent();
+}
+
 void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
     MachineFunction &MF, RegScavenger *RS) const {
   MachineFrameInfo &MFFrame = MF.getFrameInfo();
@@ -480,6 +474,8 @@ void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
       ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D)
     for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D))
       MO.setIsKill(false);
+
+  removeCallSeqPseudos(MF);
 }
 
 // Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
@@ -1471,6 +1467,8 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
   // with existing compilers.
   MFFrame.setMaxCallFrameSize(
       std::max(64U, (unsigned)alignTo(MFFrame.getMaxCallFrameSize(), 64)));
+
+  removeCallSeqPseudos(MF);
 }
 
 // Determines the size of the frame, and creates the deferred spill objects.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 95f30e3c0d99c8..03ce8882c4de5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,9 +41,6 @@ class SystemZFrameLowering : public TargetFrameLowering {
   }
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
-  MachineBasicBlock::iterator
-  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI) const override;
 };
 
 class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index af59a55b091ee3..2e8fab8dbec742 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8200,20 +8200,22 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
 MachineBasicBlock *
 SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
                                         MachineBasicBlock *BB) const {
+  MachineFunction &MF = *BB->getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
+  assert(TFL->hasReservedCallFrame(MF) &&
+         "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
   // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
   // remove these nodes. Given that these nodes start out as a glued sequence
   // it seems best to handle them here after instruction selection and
   // scheduling.
-  MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
   uint32_t NumBytes = MI.getOperand(0).getImm();
   if (NumBytes > MFI.getMaxCallFrameSize())
     MFI.setMaxCallFrameSize(NumBytes);
   MFI.setAdjustsStack(true);
 
-  // Set the NumBytes value to 0 to avoid problems of maintaining the call
-  // frame size across CFG edges.  TODO: MI could be erased, but it seems to
-  // help scheduling around calls slightly (fix MachineScheduler + handle the
-  // adjustsStack implication).
+  // TODO: MI should be erased. For now, keep it around as it seems to help
+  // eliminate COPYs around calls slightly in general (fix MachineScheduler?).
   MI.getOperand(0).setImm(0);
 
   return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2a6dce863c28f1..950548abcfa92c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
 void SystemZInstrInfo::anchor() {}
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
-    : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+    : SystemZGenInstrInfo(-1, -1),
       RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
       STI(sti) {}
 
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
index 4637ff2a1b65b5..8290dbfe23104f 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -88,16 +88,13 @@ entry:
   ret i64 %retval
 }
 
-; TODO: Unfortunately the lgdr is scheduled below the COPY from $r1d, causing
-; an overlap and thus an extra copy.
 ; CHECK-LABEL: call_vararg_both0:
 ; CHECK:         stmg 6, 7, 1872(4)
 ; CHECK-NEXT:    aghi 4, -192
 ; CHECK-NEXT:    lg 6, 40(5)
 ; CHECK-NEXT:    lg 5, 32(5)
-; CHECK-NEXT:    lgdr 0, 0
 ; CHECK-NEXT:    lgr 2, 1
-; CHECK-NEXT:    lgr 1, 0
+; CHECK-NEXT:    lgdr 1, 0
 ; CHECK-NEXT:    basr 7, 6
 ; CHECK-NEXT:    bcr 0, 0
 ; CHECK-NEXT:    lg 7, 2072(4)
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-04.mir b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
index ab4a14cfaee87b..23fd2739698a40 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-04.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
@@ -65,10 +65,12 @@ body:             |
     CHIMux %3, 0, implicit-def $cc
     %0 = LOCRMux undef %0, %5, 14, 6, implicit $cc
     %0 = LOCRMux %0, %2, 14, 6, implicit killed $cc
+    ADJCALLSTACKDOWN 0, 0
     %7 = LGFR %0
     $r3d = LGHI 0
     $r4d = COPY %7
     CallBRASL @foo, undef $r2d, killed $r3d, killed $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d
+    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
index 2ea67dcce067bc..64c6d069799282 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
@@ -155,7 +155,9 @@ body:             |
     J %bb.4
 
   bb.4.bb33:
+    ADJCALLSTACKDOWN 0, 0
     CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
+    ADJCALLSTACKUP 0, 0
     STRL %4, @globvar :: (store (s32) into @globvar)
     CLFIMux undef %23:grx32bit, 1, implicit-def $cc
     %25:grx32bit = LHIMux 0
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
index 8a7929c9eb2c31..2701a1dc034a22 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
@@ -45,9 +45,11 @@ body:             |
     %11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc
     CHIMux %6, 2, implicit-def $cc
     %0:gr32bit = SELRMux %11, %5, 14, 8, implicit killed $cc
+    ADJCALLSTACKDOWN 0, 0
     %10:gr64bit = LGFR %0
     $r2d = COPY %10
     CallBRASL @foo, killed $r2d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
+    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
index 009fd6ce82679b..c98ffda8372721 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
@@ -200,12 +200,18 @@ body:             |
   
     %32:gr64bit = COPY $r3d
     %0:gr64bit = COPY $r2d
+    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %1:addr64bit = COPY $r2d
+    ADJCALLSTACKUP 0, 0
+    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %2:addr64bit = COPY $r2d
+    ADJCALLSTACKUP 0, 0
     %3:gr32bit = AHIMuxK %0.subreg_l32, -1, implicit-def dead $cc
+    ADJCALLSTACKDOWN 0, 0
     CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
+    ADJCALLSTACKUP 0, 0
     %55:gr32bit = AHIMuxK %0.subreg_l32, 3, implicit-def dead $cc
     %56:addr64bit = LGHI 0
     %57:gr64bit = COPY %0
diff --git a/llvm/test/CodeGen/SystemZ/frame-28.mir b/llvm/test/CodeGen/SystemZ/frame-28.mir
index 254b8a2cf2461b..13337dba6ec53f 100644
--- a/llvm/test/CodeGen/SystemZ/frame-28.mir
+++ b/llvm/test/CodeGen/SystemZ/frame-28.mir
@@ -179,7 +179,9 @@ body:             |
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.1, 0, $noreg
+    ADJCALLSTACKDOWN 0, 0
     CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
+    ADJCALLSTACKUP 0, 0
     $f17d = IMPLICIT_DEF
     VST64 renamable $f17d, %stack.1, 0, $noreg
     Return
diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index 1b18287cac1468..3ea29f1d830ec6 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -30,8 +30,8 @@ entry:
 define float @caller(ptr %error_ref) {
 ; CHECK-LABEL: caller:
 ; Make a copy of error_ref because r2 is getting clobbered
-; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK-DAG: lghi %r9, 0
+; CHECK: lgr %r[[REG1:[0-9]+]], %r2
+; CHECK: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: %r2, %r9
 ; CHECK: jlh
@@ -197,7 +197,7 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-LABEL: foo_sret:
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3
-; CHECK-DAG: lghi %r2, 16
+; CHECK: lghi %r2, 16
 ; CHECK: brasl %r14, malloc
 ; CHECK: mvi 8(%r2), 1
 ; CHECK: st %r[[REG2]], 4(%r[[REG1]])
@@ -280,7 +280,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3
 ; The first swifterror value:
-; CHECK-DAG: lghi %r9, 0
+; CHECK: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: ltgr %r2, %r9
 ; CHECK: jlh
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 9d77744f18ca1a..69e1c2f4aa0af3 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -1649,8 +1649,8 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI36_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f8
@@ -1707,14 +1707,14 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_1
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_2
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    ler %f2, %f9
@@ -1784,14 +1784,14 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    std %f0, 16(%r13)
@@ -1865,20 +1865,20 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_3
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    lghi %r2, 3
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f10

>From a72a5531de185e8f1a524d0ea63908a34d6a81c8 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 27 Mar 2024 13:33:15 +0100
Subject: [PATCH 6/6] Commenting. Don't set AdjustsStack

---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 12 ++++++------
 llvm/lib/Target/SystemZ/SystemZInstrInfo.td     |  5 ++---
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2e8fab8dbec742..196903fa4d3202 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8205,17 +8205,17 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
   auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
   assert(TFL->hasReservedCallFrame(MF) &&
          "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
-  // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
-  // remove these nodes. Given that these nodes start out as a glued sequence
-  // it seems best to handle them here after instruction selection and
-  // scheduling.
+  // Get the MaxCallFrameSize value and clear the NumBytes value to not
+  // confuse the verifier. Keep them around as scheduling barriers around
+  // call arguments even though they serve no further purpose as the call
+  // frame is statically reserved in the prolog.
   uint32_t NumBytes = MI.getOperand(0).getImm();
   if (NumBytes > MFI.getMaxCallFrameSize())
     MFI.setMaxCallFrameSize(NumBytes);
+  // Set AdjustsStack as this is *not* mapped as a frame instruction.
   MFI.setAdjustsStack(true);
 
-  // TODO: MI should be erased. For now, keep it around as it seems to help
-  // eliminate COPYs around calls slightly in general (fix MachineScheduler?).
+  // TODO: Fix machine scheduler and erase MI instead?
   MI.getOperand(0).setImm(0);
 
   return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 04a9467ec9a5f9..7f3a143aad9709 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -13,9 +13,8 @@ def IsTargetELF           : Predicate<"Subtarget->isTargetELF()">;
 // Stack allocation
 //===----------------------------------------------------------------------===//
 
-// These pseudos are removed after instruction selection while updating the
-// values of MaxcallFrameSize and AdjustsStack which are needed during frame
-// lowering.  The callseq_start node requires the hasSideEffects flag.
+// These pseudos carry values needed to compute the MaxcallFrameSize of the
+// function.  The callseq_start node requires the hasSideEffects flag.
 let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
   def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
                                 [(callseq_start timm:$amt1, timm:$amt2)]>;



More information about the llvm-commits mailing list