[llvm] [SystemZ] Eliminate call sequence instructions early. (PR #77812)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 06:31:18 PDT 2024
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/77812
>From 1245999bdc5a3f9a3868be05b0047593febad7b1 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 10 Jan 2024 20:44:22 -0600
Subject: [PATCH 1/6] Try in common code. In SystemZ only. Try removeing
callseq instructions. was 741b28ae
---
.../Target/SystemZ/SystemZISelLowering.cpp | 22 +++++++++++++++++++
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 ++
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 +-
llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 7 +++---
llvm/test/CodeGen/SystemZ/call-zos-vararg.ll | 5 ++++-
llvm/test/CodeGen/SystemZ/cond-move-04.mir | 2 --
llvm/test/CodeGen/SystemZ/cond-move-08.mir | 2 --
.../SystemZ/cond-move-regalloc-hints-02.mir | 2 --
.../SystemZ/cond-move-regalloc-hints.mir | 6 -----
llvm/test/CodeGen/SystemZ/frame-28.mir | 2 --
llvm/test/CodeGen/SystemZ/swifterror.ll | 8 +++----
.../vector-constrained-fp-intrinsics.ll | 16 +++++++-------
12 files changed, 45 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index da4bcd7f0c66ed..7d3cc831a64ebc 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8197,6 +8197,24 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
}
+MachineBasicBlock *
+SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
+ // remove these nodes. Given that these nodes start out as a glued sequence
+ // it seems best to remove them here after instruction selection and
+ // scheduling. NB: MIR testing does not work (yet) for call frames with
+ // this.
+ MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
+ uint32_t NumBytes = MI.getOperand(0).getImm();
+ if (NumBytes > MFI.getMaxCallFrameSize())
+ MFI.setMaxCallFrameSize(NumBytes);
+ MFI.setAdjustsStack(true);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
@@ -9400,6 +9418,10 @@ getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ return emitAdjCallStack(MI, MBB);
+
case SystemZ::Select32:
case SystemZ::Select64:
case SystemZ::Select128:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4943c5cb703c33..7140287a886ccf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -760,6 +760,8 @@ class SystemZTargetLowering : public TargetLowering {
MachineBasicBlock *Target) const;
// Implement EmitInstrWithCustomInserter for individual operation types.
+ MachineBasicBlock *emitAdjCallStack(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2a6dce863c28f1..950548abcfa92c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
void SystemZInstrInfo::anchor() {}
SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
- : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+ : SystemZGenInstrInfo(-1, -1),
RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
STI(sti) {}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 96ea65b6c3d881..04a9467ec9a5f9 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -13,9 +13,10 @@ def IsTargetELF : Predicate<"Subtarget->isTargetELF()">;
// Stack allocation
//===----------------------------------------------------------------------===//
-// The callseq_start node requires the hasSideEffects flag, even though these
-// instructions are noops on SystemZ.
-let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
+// These pseudos are removed after instruction selection while updating the
+// values of MaxcallFrameSize and AdjustsStack which are needed during frame
+// lowering. The callseq_start node requires the hasSideEffects flag.
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
index 8290dbfe23104f..4637ff2a1b65b5 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -88,13 +88,16 @@ entry:
ret i64 %retval
}
+; TODO: Unfortunately the lgdr is scheduled below the COPY from $r1d, causing
+; an overlap and thus an extra copy.
; CHECK-LABEL: call_vararg_both0:
; CHECK: stmg 6, 7, 1872(4)
; CHECK-NEXT: aghi 4, -192
; CHECK-NEXT: lg 6, 40(5)
; CHECK-NEXT: lg 5, 32(5)
+; CHECK-NEXT: lgdr 0, 0
; CHECK-NEXT: lgr 2, 1
-; CHECK-NEXT: lgdr 1, 0
+; CHECK-NEXT: lgr 1, 0
; CHECK-NEXT: basr 7, 6
; CHECK-NEXT: bcr 0, 0
; CHECK-NEXT: lg 7, 2072(4)
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-04.mir b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
index 23fd2739698a40..ab4a14cfaee87b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-04.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
@@ -65,12 +65,10 @@ body: |
CHIMux %3, 0, implicit-def $cc
%0 = LOCRMux undef %0, %5, 14, 6, implicit $cc
%0 = LOCRMux %0, %2, 14, 6, implicit killed $cc
- ADJCALLSTACKDOWN 0, 0
%7 = LGFR %0
$r3d = LGHI 0
$r4d = COPY %7
CallBRASL @foo, undef $r2d, killed $r3d, killed $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d
- ADJCALLSTACKUP 0, 0
J %bb.1
...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
index 64c6d069799282..2ea67dcce067bc 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
@@ -155,9 +155,7 @@ body: |
J %bb.4
bb.4.bb33:
- ADJCALLSTACKDOWN 0, 0
CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
- ADJCALLSTACKUP 0, 0
STRL %4, @globvar :: (store (s32) into @globvar)
CLFIMux undef %23:grx32bit, 1, implicit-def $cc
%25:grx32bit = LHIMux 0
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
index 2701a1dc034a22..8a7929c9eb2c31 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
@@ -45,11 +45,9 @@ body: |
%11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc
CHIMux %6, 2, implicit-def $cc
%0:gr32bit = SELRMux %11, %5, 14, 8, implicit killed $cc
- ADJCALLSTACKDOWN 0, 0
%10:gr64bit = LGFR %0
$r2d = COPY %10
CallBRASL @foo, killed $r2d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
- ADJCALLSTACKUP 0, 0
J %bb.1
...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
index c98ffda8372721..009fd6ce82679b 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
@@ -200,18 +200,12 @@ body: |
%32:gr64bit = COPY $r3d
%0:gr64bit = COPY $r2d
- ADJCALLSTACKDOWN 0, 0
CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
%1:addr64bit = COPY $r2d
- ADJCALLSTACKUP 0, 0
- ADJCALLSTACKDOWN 0, 0
CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
%2:addr64bit = COPY $r2d
- ADJCALLSTACKUP 0, 0
%3:gr32bit = AHIMuxK %0.subreg_l32, -1, implicit-def dead $cc
- ADJCALLSTACKDOWN 0, 0
CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
- ADJCALLSTACKUP 0, 0
%55:gr32bit = AHIMuxK %0.subreg_l32, 3, implicit-def dead $cc
%56:addr64bit = LGHI 0
%57:gr64bit = COPY %0
diff --git a/llvm/test/CodeGen/SystemZ/frame-28.mir b/llvm/test/CodeGen/SystemZ/frame-28.mir
index 13337dba6ec53f..254b8a2cf2461b 100644
--- a/llvm/test/CodeGen/SystemZ/frame-28.mir
+++ b/llvm/test/CodeGen/SystemZ/frame-28.mir
@@ -179,9 +179,7 @@ body: |
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
- ADJCALLSTACKDOWN 0, 0
CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
- ADJCALLSTACKUP 0, 0
$f17d = IMPLICIT_DEF
VST64 renamable $f17d, %stack.1, 0, $noreg
Return
diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index 3ea29f1d830ec6..1b18287cac1468 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -30,8 +30,8 @@ entry:
define float @caller(ptr %error_ref) {
; CHECK-LABEL: caller:
; Make a copy of error_ref because r2 is getting clobbered
-; CHECK: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
+; CHECK-DAG: lghi %r9, 0
; CHECK: brasl %r14, foo
; CHECK: %r2, %r9
; CHECK: jlh
@@ -197,7 +197,7 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
; CHECK-LABEL: foo_sret:
; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3
-; CHECK: lghi %r2, 16
+; CHECK-DAG: lghi %r2, 16
; CHECK: brasl %r14, malloc
; CHECK: mvi 8(%r2), 1
; CHECK: st %r[[REG2]], 4(%r[[REG1]])
@@ -280,7 +280,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3
; The first swifterror value:
-; CHECK: lghi %r9, 0
+; CHECK-DAG: lghi %r9, 0
; CHECK: brasl %r14, foo
; CHECK: ltgr %r2, %r9
; CHECK: jlh
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 69e1c2f4aa0af3..9d77744f18ca1a 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -1649,8 +1649,8 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI36_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: ldr %f2, %f8
@@ -1707,14 +1707,14 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: larl %r1, .LCPI37_1
; S390X-NEXT: le %f1, 0(%r1)
-; S390X-NEXT: ler %f8, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ler %f8, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: larl %r1, .LCPI37_2
; S390X-NEXT: le %f1, 0(%r1)
-; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ler %f9, %f0
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: ler %f2, %f9
@@ -1784,14 +1784,14 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI38_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI38_2
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f9, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f9, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: std %f0, 16(%r13)
@@ -1865,20 +1865,20 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f8, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_2
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f9, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f9, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_3
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: lghi %r2, 3
+; S390X-NEXT: ldr %f10, %f0
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: ldr %f2, %f10
>From 1f34bf3d32c5d1e6bdc974c9577650c1c805258d Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 11:41:21 -0600
Subject: [PATCH 2/6] Remove eliminateCallFramePseudoInstr and MIR comment
---
llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 4 ++++
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 ----------------
llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 3 ---
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 8 +++++---
llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 2 ++
5 files changed, 11 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 5696ae117d69f0..3991fce65f03e5 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -199,6 +199,10 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ return;
+
case SystemZ::Return:
LoweredMI = MCInstBuilder(SystemZ::BR)
.addReg(SystemZ::R14D);
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 80c994a32ea96a..4897b37d8eb1ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,22 +66,6 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
return std::make_unique<SystemZELFFrameLowering>();
}
-MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- switch (MI->getOpcode()) {
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- assert(hasReservedCallFrame(MF) &&
- "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- return MBB.erase(MI);
- break;
-
- default:
- llvm_unreachable("Unexpected call frame instruction");
- }
-}
-
namespace {
struct SZFrameSortingObj {
bool IsValid = false; // True if we care about this Object.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 95f30e3c0d99c8..03ce8882c4de5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,9 +41,6 @@ class SystemZFrameLowering : public TargetFrameLowering {
}
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- MachineBasicBlock::iterator
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override;
};
class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 7d3cc831a64ebc..071dc7aa21fa22 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8203,15 +8203,17 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
// Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
// remove these nodes. Given that these nodes start out as a glued sequence
// it seems best to remove them here after instruction selection and
- // scheduling. NB: MIR testing does not work (yet) for call frames with
- // this.
+ // scheduling.
MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
uint32_t NumBytes = MI.getOperand(0).getImm();
if (NumBytes > MFI.getMaxCallFrameSize())
MFI.setMaxCallFrameSize(NumBytes);
MFI.setAdjustsStack(true);
- MI.eraseFromParent();
+ // TODO: MI should be erased. For now, keep it around as it seems to help
+ // scheduling around calls slightly in general (fix MachineScheduler).
+ MI.getOperand(0).setImm(0);
+
return BB;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 632218cc61eefe..9cc7e7ac8b76a4 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -218,6 +218,8 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI,
// These do not have a size:
MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER ||
+ MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
+ MI.getOpcode() == SystemZ::ADJCALLSTACKUP ||
// These have a size that may be zero:
MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
MI.getOpcode() == SystemZ::PATCHPOINT) &&
>From 0e652a6010248a59d19ba4bed823b98a3f54b4ec Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 12:38:30 -0600
Subject: [PATCH 3/6] Remove in PostRA pseudos instead
---
llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 4 ----
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 5 +++++
llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 2 --
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3991fce65f03e5..5696ae117d69f0 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -199,10 +199,6 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
switch (MI->getOpcode()) {
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- return;
-
case SystemZ::Return:
LoweredMI = MCInstBuilder(SystemZ::BR)
.addReg(SystemZ::R14D);
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 950548abcfa92c..c3e563d0b5ecd2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1513,6 +1513,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandLoadStackGuard(&MI);
return true;
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ MI.eraseFromParent();
+ return true;
+
default:
return false;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9cc7e7ac8b76a4..632218cc61eefe 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -218,8 +218,6 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI,
// These do not have a size:
MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER ||
- MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
- MI.getOpcode() == SystemZ::ADJCALLSTACKUP ||
// These have a size that may be zero:
MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
MI.getOpcode() == SystemZ::PATCHPOINT) &&
>From 882bbf4a95adf2e9bf0111d245f0bf2fc1200e65 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 12 Jan 2024 14:33:46 -0600
Subject: [PATCH 4/6] Just set to 0
---
llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 ++++++++++++++++
llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 3 +++
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 8 +++++---
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 7 +------
4 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 4897b37d8eb1ef..80c994a32ea96a 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,6 +66,22 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
return std::make_unique<SystemZELFFrameLowering>();
}
+MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ assert(hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+ return MBB.erase(MI);
+ break;
+
+ default:
+ llvm_unreachable("Unexpected call frame instruction");
+ }
+}
+
namespace {
struct SZFrameSortingObj {
bool IsValid = false; // True if we care about this Object.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 03ce8882c4de5d..95f30e3c0d99c8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,6 +41,9 @@ class SystemZFrameLowering : public TargetFrameLowering {
}
bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
};
class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 071dc7aa21fa22..af59a55b091ee3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8202,7 +8202,7 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
// remove these nodes. Given that these nodes start out as a glued sequence
- // it seems best to remove them here after instruction selection and
+ // it seems best to handle them here after instruction selection and
// scheduling.
MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
uint32_t NumBytes = MI.getOperand(0).getImm();
@@ -8210,8 +8210,10 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
MFI.setMaxCallFrameSize(NumBytes);
MFI.setAdjustsStack(true);
- // TODO: MI should be erased. For now, keep it around as it seems to help
- // scheduling around calls slightly in general (fix MachineScheduler).
+ // Set the NumBytes value to 0 to avoid problems of maintaining the call
+ // frame size across CFG edges. TODO: MI could be erased, but it seems to
+ // help scheduling around calls slightly (fix MachineScheduler + handle the
+ // adjustsStack implication).
MI.getOperand(0).setImm(0);
return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c3e563d0b5ecd2..2a6dce863c28f1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
void SystemZInstrInfo::anchor() {}
SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
- : SystemZGenInstrInfo(-1, -1),
+ : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
STI(sti) {}
@@ -1513,11 +1513,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandLoadStackGuard(&MI);
return true;
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- MI.eraseFromParent();
- return true;
-
default:
return false;
}
>From 61c18df09b822b53aa3cf7b8ce022bf7d2a4ca89 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Mon, 15 Jan 2024 13:29:28 -0600
Subject: [PATCH 5/6] Remove mapping, and remove MIs during PEI.
---
.../Target/SystemZ/SystemZFrameLowering.cpp | 30 +++++++++----------
.../lib/Target/SystemZ/SystemZFrameLowering.h | 3 --
.../Target/SystemZ/SystemZISelLowering.cpp | 12 ++++----
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 +-
llvm/test/CodeGen/SystemZ/call-zos-vararg.ll | 5 +---
llvm/test/CodeGen/SystemZ/cond-move-04.mir | 2 ++
llvm/test/CodeGen/SystemZ/cond-move-08.mir | 2 ++
.../SystemZ/cond-move-regalloc-hints-02.mir | 2 ++
.../SystemZ/cond-move-regalloc-hints.mir | 6 ++++
llvm/test/CodeGen/SystemZ/frame-28.mir | 2 ++
llvm/test/CodeGen/SystemZ/swifterror.ll | 8 ++---
.../vector-constrained-fp-intrinsics.ll | 16 +++++-----
12 files changed, 49 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 80c994a32ea96a..dc7e6589b48af2 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -66,22 +66,6 @@ SystemZFrameLowering::create(const SystemZSubtarget &STI) {
return std::make_unique<SystemZELFFrameLowering>();
}
-MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- switch (MI->getOpcode()) {
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- assert(hasReservedCallFrame(MF) &&
- "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- return MBB.erase(MI);
- break;
-
- default:
- llvm_unreachable("Unexpected call frame instruction");
- }
-}
-
namespace {
struct SZFrameSortingObj {
bool IsValid = false; // True if we care about this Object.
@@ -439,6 +423,16 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
return true;
}
+static void removeCallSeqPseudos(MachineFunction &MF) {
+ // TODO: These could have been removed in finalize isel already as they are
+ // not mapped as frame instructions. See comment in emitAdjCallStack().
+ for (auto &MBB : MF)
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ if (MI.getOpcode() == SystemZ::ADJCALLSTACKDOWN ||
+ MI.getOpcode() == SystemZ::ADJCALLSTACKUP)
+ MI.eraseFromParent();
+}
+
void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
@@ -480,6 +474,8 @@ void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
ZFI->getRestoreGPRRegs().LowGPR != SystemZ::R6D)
for (auto &MO : MRI->use_nodbg_operands(SystemZ::R6D))
MO.setIsKill(false);
+
+ removeCallSeqPseudos(MF);
}
// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
@@ -1471,6 +1467,8 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
// with existing compilers.
MFFrame.setMaxCallFrameSize(
std::max(64U, (unsigned)alignTo(MFFrame.getMaxCallFrameSize(), 64)));
+
+ removeCallSeqPseudos(MF);
}
// Determines the size of the frame, and creates the deferred spill objects.
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 95f30e3c0d99c8..03ce8882c4de5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -41,9 +41,6 @@ class SystemZFrameLowering : public TargetFrameLowering {
}
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- MachineBasicBlock::iterator
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override;
};
class SystemZELFFrameLowering : public SystemZFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index af59a55b091ee3..2e8fab8dbec742 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8200,20 +8200,22 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
MachineBasicBlock *
SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
MachineBasicBlock *BB) const {
+ MachineFunction &MF = *BB->getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
+ assert(TFL->hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
// Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
// remove these nodes. Given that these nodes start out as a glued sequence
// it seems best to handle them here after instruction selection and
// scheduling.
- MachineFrameInfo &MFI = BB->getParent()->getFrameInfo();
uint32_t NumBytes = MI.getOperand(0).getImm();
if (NumBytes > MFI.getMaxCallFrameSize())
MFI.setMaxCallFrameSize(NumBytes);
MFI.setAdjustsStack(true);
- // Set the NumBytes value to 0 to avoid problems of maintaining the call
- // frame size across CFG edges. TODO: MI could be erased, but it seems to
- // help scheduling around calls slightly (fix MachineScheduler + handle the
- // adjustsStack implication).
+ // TODO: MI should be erased. For now, keep it around as it seems to help
+ // eliminate COPYs around calls slightly in general (fix MachineScheduler?).
MI.getOperand(0).setImm(0);
return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2a6dce863c28f1..950548abcfa92c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -59,7 +59,7 @@ static uint64_t allOnes(unsigned int Count) {
void SystemZInstrInfo::anchor() {}
SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
- : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+ : SystemZGenInstrInfo(-1, -1),
RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister()),
STI(sti) {}
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
index 4637ff2a1b65b5..8290dbfe23104f 100644
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -88,16 +88,13 @@ entry:
ret i64 %retval
}
-; TODO: Unfortunately the lgdr is scheduled below the COPY from $r1d, causing
-; an overlap and thus an extra copy.
; CHECK-LABEL: call_vararg_both0:
; CHECK: stmg 6, 7, 1872(4)
; CHECK-NEXT: aghi 4, -192
; CHECK-NEXT: lg 6, 40(5)
; CHECK-NEXT: lg 5, 32(5)
-; CHECK-NEXT: lgdr 0, 0
; CHECK-NEXT: lgr 2, 1
-; CHECK-NEXT: lgr 1, 0
+; CHECK-NEXT: lgdr 1, 0
; CHECK-NEXT: basr 7, 6
; CHECK-NEXT: bcr 0, 0
; CHECK-NEXT: lg 7, 2072(4)
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-04.mir b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
index ab4a14cfaee87b..23fd2739698a40 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-04.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-04.mir
@@ -65,10 +65,12 @@ body: |
CHIMux %3, 0, implicit-def $cc
%0 = LOCRMux undef %0, %5, 14, 6, implicit $cc
%0 = LOCRMux %0, %2, 14, 6, implicit killed $cc
+ ADJCALLSTACKDOWN 0, 0
%7 = LGFR %0
$r3d = LGHI 0
$r4d = COPY %7
CallBRASL @foo, undef $r2d, killed $r3d, killed $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d
+ ADJCALLSTACKUP 0, 0
J %bb.1
...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
index 2ea67dcce067bc..64c6d069799282 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir
@@ -155,7 +155,9 @@ body: |
J %bb.4
bb.4.bb33:
+ ADJCALLSTACKDOWN 0, 0
CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
+ ADJCALLSTACKUP 0, 0
STRL %4, @globvar :: (store (s32) into @globvar)
CLFIMux undef %23:grx32bit, 1, implicit-def $cc
%25:grx32bit = LHIMux 0
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
index 8a7929c9eb2c31..2701a1dc034a22 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir
@@ -45,9 +45,11 @@ body: |
%11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc
CHIMux %6, 2, implicit-def $cc
%0:gr32bit = SELRMux %11, %5, 14, 8, implicit killed $cc
+ ADJCALLSTACKDOWN 0, 0
%10:gr64bit = LGFR %0
$r2d = COPY %10
CallBRASL @foo, killed $r2d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc
+ ADJCALLSTACKUP 0, 0
J %bb.1
...
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
index 009fd6ce82679b..c98ffda8372721 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
+++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir
@@ -200,12 +200,18 @@ body: |
%32:gr64bit = COPY $r3d
%0:gr64bit = COPY $r2d
+ ADJCALLSTACKDOWN 0, 0
CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
%1:addr64bit = COPY $r2d
+ ADJCALLSTACKUP 0, 0
+ ADJCALLSTACKDOWN 0, 0
CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
%2:addr64bit = COPY $r2d
+ ADJCALLSTACKUP 0, 0
%3:gr32bit = AHIMuxK %0.subreg_l32, -1, implicit-def dead $cc
+ ADJCALLSTACKDOWN 0, 0
CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
+ ADJCALLSTACKUP 0, 0
%55:gr32bit = AHIMuxK %0.subreg_l32, 3, implicit-def dead $cc
%56:addr64bit = LGHI 0
%57:gr64bit = COPY %0
diff --git a/llvm/test/CodeGen/SystemZ/frame-28.mir b/llvm/test/CodeGen/SystemZ/frame-28.mir
index 254b8a2cf2461b..13337dba6ec53f 100644
--- a/llvm/test/CodeGen/SystemZ/frame-28.mir
+++ b/llvm/test/CodeGen/SystemZ/frame-28.mir
@@ -179,7 +179,9 @@ body: |
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.0, 0, $noreg
VST64 renamable $f16d, %stack.1, 0, $noreg
+ ADJCALLSTACKDOWN 0, 0
CallBRASL @foo, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
+ ADJCALLSTACKUP 0, 0
$f17d = IMPLICIT_DEF
VST64 renamable $f17d, %stack.1, 0, $noreg
Return
diff --git a/llvm/test/CodeGen/SystemZ/swifterror.ll b/llvm/test/CodeGen/SystemZ/swifterror.ll
index 1b18287cac1468..3ea29f1d830ec6 100644
--- a/llvm/test/CodeGen/SystemZ/swifterror.ll
+++ b/llvm/test/CodeGen/SystemZ/swifterror.ll
@@ -30,8 +30,8 @@ entry:
define float @caller(ptr %error_ref) {
; CHECK-LABEL: caller:
; Make a copy of error_ref because r2 is getting clobbered
-; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
-; CHECK-DAG: lghi %r9, 0
+; CHECK: lgr %r[[REG1:[0-9]+]], %r2
+; CHECK: lghi %r9, 0
; CHECK: brasl %r14, foo
; CHECK: %r2, %r9
; CHECK: jlh
@@ -197,7 +197,7 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
; CHECK-LABEL: foo_sret:
; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3
-; CHECK-DAG: lghi %r2, 16
+; CHECK: lghi %r2, 16
; CHECK: brasl %r14, malloc
; CHECK: mvi 8(%r2), 1
; CHECK: st %r[[REG2]], 4(%r[[REG1]])
@@ -280,7 +280,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2
; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3
; The first swifterror value:
-; CHECK-DAG: lghi %r9, 0
+; CHECK: lghi %r9, 0
; CHECK: brasl %r14, foo
; CHECK: ltgr %r2, %r9
; CHECK: jlh
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index 9d77744f18ca1a..69e1c2f4aa0af3 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -1649,8 +1649,8 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI36_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f8, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: ldr %f2, %f8
@@ -1707,14 +1707,14 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: larl %r1, .LCPI37_1
; S390X-NEXT: le %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ler %f8, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: larl %r1, .LCPI37_2
; S390X-NEXT: le %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ler %f9, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ler %f0, %f1
; S390X-NEXT: brasl %r14, __powisf2 at PLT
; S390X-NEXT: ler %f2, %f9
@@ -1784,14 +1784,14 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI38_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f8, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI38_2
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f9, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: std %f0, 16(%r13)
@@ -1865,20 +1865,20 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f8, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_2
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f9, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: larl %r1, .LCPI39_3
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f10, %f0
+; S390X-NEXT: lghi %r2, 3
; S390X-NEXT: ldr %f0, %f1
; S390X-NEXT: brasl %r14, __powidf2 at PLT
; S390X-NEXT: ldr %f2, %f10
>From f51cfd7b28b7070bf308d3d568a3cbbff1aea6c8 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 27 Mar 2024 13:33:15 +0100
Subject: [PATCH 6/6] Commenting.
---
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 12 ++++++------
llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 5 ++---
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2e8fab8dbec742..196903fa4d3202 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8205,17 +8205,17 @@ SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
assert(TFL->hasReservedCallFrame(MF) &&
"ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- // Do the work of MachineFrameInfo::computeMaxCallFrameSize() early and
- // remove these nodes. Given that these nodes start out as a glued sequence
- // it seems best to handle them here after instruction selection and
- // scheduling.
+ // Get the MaxCallFrameSize value and clear the NumBytes value to not
+ // confuse the verifier. Keep them around as scheduling barriers around
+ // call arguments even though they serve no further purpose as the call
+ // frame is statically reserved in the prolog.
uint32_t NumBytes = MI.getOperand(0).getImm();
if (NumBytes > MFI.getMaxCallFrameSize())
MFI.setMaxCallFrameSize(NumBytes);
+ // Set AdjustsStack as this is *not* mapped as a frame instruction.
MFI.setAdjustsStack(true);
- // TODO: MI should be erased. For now, keep it around as it seems to help
- // eliminate COPYs around calls slightly in general (fix MachineScheduler?).
+ // TODO: Fix machine scheduler and erase MI instead?
MI.getOperand(0).setImm(0);
return BB;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 04a9467ec9a5f9..7f3a143aad9709 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -13,9 +13,8 @@ def IsTargetELF : Predicate<"Subtarget->isTargetELF()">;
// Stack allocation
//===----------------------------------------------------------------------===//
-// These pseudos are removed after instruction selection while updating the
-// values of MaxcallFrameSize and AdjustsStack which are needed during frame
-// lowering. The callseq_start node requires the hasSideEffects flag.
+// These pseudos carry values needed to compute the MaxcallFrameSize of the
+// function. The callseq_start node requires the hasSideEffects flag.
let usesCustomInserter = 1, hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
[(callseq_start timm:$amt1, timm:$amt2)]>;
More information about the llvm-commits
mailing list