[llvm] 16b7cc6 - [SystemZ] Eliminate call sequence instructions early. (#77812)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 10:26:42 PDT 2024


Author: Jonas Paulsson
Date: 2024-03-28T18:26:38+01:00
New Revision: 16b7cc69ef43110c9776212379ccc37d7d3263c0

URL: https://github.com/llvm/llvm-project/commit/16b7cc69ef43110c9776212379ccc37d7d3263c0
DIFF: https://github.com/llvm/llvm-project/commit/16b7cc69ef43110c9776212379ccc37d7d3263c0.diff

LOG: [SystemZ] Eliminate call sequence instructions early. (#77812)

On SystemZ, the outgoing argument area which is big enough for all calls
in the function is created once during the prolog, as opposed to
adjusting the stack around each call. The call-sequence instructions are
therefore not really useful any more than to compute the maximum call
frame size, which has so far been done by PEI, but can just as well be
done at an earlier point.

This patch removes the mapping of the CallFrameSetupOpcode and
CallFrameDestroyOpcode and instead computes the MaxCallFrameSize
directly after instruction selection and then removes the ADJCALLSTACK
pseudos. This removes the confusing pseudos and also avoids the problem
of having to keep the call frame size accurate when creating new MBBs.

This fixes #76618 which exposed the need to maintain the call frame size
when splitting blocks (which was not done).

Added: 
    

Modified: 
    llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
    llvm/lib/Target/SystemZ/SystemZFrameLowering.h
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZInstrInfo.td
    llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
    llvm/test/CodeGen/SystemZ/cond-move-04.mir
    llvm/test/CodeGen/SystemZ/cond-move-08.mir
    llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
    llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
    llvm/test/CodeGen/SystemZ/frame-28.mir
    llvm/test/CodeGen/SystemZ/swifterror.ll
    llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 80c994a32ea96a..4897b37d8eb1ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,22 +66,6 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
   return std::make_unique<SystemZELFFrameLowering>();
 }
 
-MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MI) const {
-  switch (MI->getOpcode()) {
-  case SystemZ::ADJCALLSTACKDOWN:
-  case SystemZ::ADJCALLSTACKUP:
-    assert(hasReservedCallFrame(MF) &&
-           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
-    return MBB.erase(MI);
-    break;
-
-  default:
-    llvm_unreachable("Unexpected call frame instruction");
-  }
-}
-
 namespace {
 struct SZFrameSortingObj {
   bool IsValid = false;     // True if we care about this Object.

diff  --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 95f30e3c0d99c8..03ce8882c4de5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,9 +41,6 @@ class SystemZFrameLowering : public TargetFrameLowering {
   }
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
-  MachineBasicBlock::iterator
-  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI) const override;
 };
 
 class SystemZELFFrameLowering : public SystemZFrameLowering {

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6496fe766101fc..ce4f5ebcd7c0d3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8173,6 +8173,26 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
   MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
+                                        MachineBasicBlock *BB) const {
+  MachineFunction &MF = *BB->getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
+  assert(TFL->hasReservedCallFrame(MF) &&
+         "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+  // Get the MaxCallFrameSize value and erase MI since it serves no further
+  // purpose as the call frame is statically reserved in the prolog. Set
+  // AdjustsStack as MI is *not* mapped as a frame instruction.
+  uint32_t NumBytes = MI.getOperand(0).getImm();
+  if (NumBytes > MFI.getMaxCallFrameSize())
+    MFI.setMaxCallFrameSize(NumBytes);
+  MFI.setAdjustsStack(true);
+
+  MI.eraseFromParent();
+  return BB;
+}
+
 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
 MachineBasicBlock *
 SystemZTargetLowering::emitSelect(MachineInstr &MI,
@@ -9376,6 +9396,10 @@ getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    return emitAdjCallStack(MI, MBB);
+
   case SystemZ::Select32:
   case SystemZ::Select64:
   case SystemZ::Select128:

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4943c5cb703c33..7140287a886ccf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -760,6 +760,8 @@ class SystemZTargetLowering : public TargetLowering {
                                   MachineBasicBlock *Target) const;
 
   // Implement EmitInstrWithCustomInserter for individual operation types.
+  MachineBasicBlock *emitAdjCallStack(MachineInstr &MI,
+                                      MachineBasicBlock *BB) const;
   MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
                                    unsigned StoreOpcode, unsigned STOCOpcode,

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2a6dce863c28f1..950548abcfa92c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
 void SystemZInstrInfo::anchor() {}
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
-    : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+    : SystemZGenInstrInfo(-1, -1),
       RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
       STI(sti) {}
 

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 96ea65b6c3d881..7f3a143aad9709 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -13,9 +13,9 @@ def IsTargetELF           : Predicate<"Subtarget->isTargetELF()">;
 // Stack allocation
 //===----------------------------------------------------------------------===//
 
-// The callseq_start node requires the hasSideEffects flag, even though these
-// instructions are noops on SystemZ.
-let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
+// These pseudos carry values needed to compute the MaxcallFrameSize of the
+// function.  The callseq_start node requires the hasSideEffects flag.
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
   def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
                                 [(callseq_start timm:$amt1, timm:$amt2)]>;
   def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),

diff  --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
index 8290dbfe23104f..81aedc1a1d7f2d 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -88,13 +88,15 @@ entry:
   ret i64 %retval
 }
 
+;; TODO: The extra COPY after LGDR is unnecessary (machine-scheduler introduces the overlap).
 ; CHECK-LABEL: call_vararg_both0:
 ; CHECK:         stmg 6, 7, 1872(4)
 ; CHECK-NEXT:    aghi 4, -192
 ; CHECK-NEXT:    lg 6, 40(5)
 ; CHECK-NEXT:    lg 5, 32(5)
+; CHECK-NEXT:    lgdr 0, 0
 ; CHECK-NEXT:    lgr 2, 1
-; CHECK-NEXT:    lgdr 1, 0
+; CHECK-NEXT:    lgr 1, 0
 ; CHECK-NEXT:    basr 7, 6
 ; CHECK-NEXT:    bcr 0, 0
 ; CHECK-NEXT:    lg 7, 2072(4)

diff  --git a/llvm/test/CodeGen/SystemZ/cond-move-04.mir b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
index 23fd2739698a40..ab4a14cfaee87b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-04.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
@@ -65,12 +65,10 @@ body:             |
     CHIMux %3, 0, implicit-def $cc
     %0 = LOCRMux undef %0, %5, 14, 6, implicit $cc
     %0 = LOCRMux %0, %2, 14, 6, implicit killed $cc
-    ADJCALLSTACKDOWN 0, 0
     %7 = LGFR %0
     $r3d = LGHI 0
     $r4d = COPY %7
     CallBRASL @foo, undef $r2d, killed $r3d, killed $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d
-    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...

diff  --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
index 64c6d069799282..2ea67dcce067bc 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
@@ -155,9 +155,7 @@ body:             |
     J %bb.4
 
   bb.4.bb33:
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
-    ADJCALLSTACKUP 0, 0
     STRL %4, @globvar :: (store (s32) into @globvar)
     CLFIMux undef %23:grx32bit, 1, implicit-def $cc
     %25:grx32bit = LHIMux 0

diff  --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
index 2701a1dc034a22..8a7929c9eb2c31 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
@@ -45,11 +45,9 @@ body:             |
     %11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc
     CHIMux %6, 2, implicit-def $cc
     %0:gr32bit = SELRMux %11, %5, 14, 8, implicit killed $cc
-    ADJCALLSTACKDOWN 0, 0
     %10:gr64bit = LGFR %0
     $r2d = COPY %10
     CallBRASL @foo, killed $r2d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
-    ADJCALLSTACKUP 0, 0
     J %bb.1
 
 ...

diff  --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
index c98ffda8372721..009fd6ce82679b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
@@ -200,18 +200,12 @@ body:             |
   
     %32:gr64bit = COPY $r3d
     %0:gr64bit = COPY $r2d
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %1:addr64bit = COPY $r2d
-    ADJCALLSTACKUP 0, 0
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
     %2:addr64bit = COPY $r2d
-    ADJCALLSTACKUP 0, 0
     %3:gr32bit = AHIMuxK %0.subreg_l32, -1, implicit-def dead $cc
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
-    ADJCALLSTACKUP 0, 0
     %55:gr32bit = AHIMuxK %0.subreg_l32, 3, implicit-def dead $cc
     %56:addr64bit = LGHI 0
     %57:gr64bit = COPY %0

diff  --git a/llvm/test/CodeGen/SystemZ/frame-28.mir b/llvm/test/CodeGen/SystemZ/frame-28.mir
index 13337dba6ec53f..254b8a2cf2461b 100644
--- a/llvm/test/CodeGen/SystemZ/frame-28.mir
+++ b/llvm/test/CodeGen/SystemZ/frame-28.mir
@@ -179,9 +179,7 @@ body:             |
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.0, 0, $noreg
     VST64 renamable $f16d, %stack.1, 0, $noreg
-    ADJCALLSTACKDOWN 0, 0
     CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
-    ADJCALLSTACKUP 0, 0
     $f17d = IMPLICIT_DEF
     VST64 renamable $f17d, %stack.1, 0, $noreg
     Return

diff  --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index 3ea29f1d830ec6..1b18287cac1468 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -30,8 +30,8 @@ entry:
 define float @caller(ptr %error_ref) {
 ; CHECK-LABEL: caller:
 ; Make a copy of error_ref because r2 is getting clobbered
-; CHECK: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
+; CHECK-DAG: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: %r2, %r9
 ; CHECK: jlh
@@ -197,7 +197,7 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-LABEL: foo_sret:
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3
-; CHECK: lghi %r2, 16
+; CHECK-DAG: lghi %r2, 16
 ; CHECK: brasl %r14, malloc
 ; CHECK: mvi 8(%r2), 1
 ; CHECK: st %r[[REG2]], 4(%r[[REG1]])
@@ -280,7 +280,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
 ; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3
 ; The first swifterror value:
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lghi %r9, 0
 ; CHECK: brasl %r14, foo
 ; CHECK: ltgr %r2, %r9
 ; CHECK: jlh

diff  --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 69e1c2f4aa0af3..9d77744f18ca1a 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -1649,8 +1649,8 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI36_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f8
@@ -1707,14 +1707,14 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_1
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f8, %f0
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI37_2
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f9, %f0
 ; S390X-NEXT:    ler %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powisf2 at PLT
 ; S390X-NEXT:    ler %f2, %f9
@@ -1784,14 +1784,14 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI38_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    std %f0, 16(%r13)
@@ -1865,20 +1865,20 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f8, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_2
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f9, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    larl %r1, .LCPI39_3
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
 ; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f10, %f0
 ; S390X-NEXT:    ldr %f0, %f1
 ; S390X-NEXT:    brasl %r14, __powidf2 at PLT
 ; S390X-NEXT:    ldr %f2, %f10


        


More information about the llvm-commits mailing list