[llvm] 7af7064 - Revert "[AArch64] Remove unused ReverseCSRRestoreSeq option. (#82326)"
Caroline Concatto via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 20 10:14:13 PST 2024
Author: Caroline Concatto
Date: 2024-02-20T18:13:33Z
New Revision: 7af70643ca4220c254bdb1e9ea51762228642a10
URL: https://github.com/llvm/llvm-project/commit/7af70643ca4220c254bdb1e9ea51762228642a10
DIFF: https://github.com/llvm/llvm-project/commit/7af70643ca4220c254bdb1e9ea51762228642a10.diff
LOG: Revert "[AArch64] Remove unused ReverseCSRRestoreSeq option. (#82326)"
Patch 3f0404aae7ed2 is breaking some debugs build so we cannot use the reverse here.
This reverts commit 493f10106f7f1799eb67be95058b251e6a3bf0af.
Added:
llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c013bbe9926fef..0e9adde564b3e5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -239,6 +239,11 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
cl::desc("enable use of redzone on AArch64"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ReverseCSRRestoreSeq("reverse-csr-restore-seq",
+ cl::desc("reverse the CSR restore sequence"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> StackTaggingMergeSetTag(
"stack-tagging-merge-settag",
cl::desc("merge settag instruction in function epilog"), cl::init(true),
@@ -302,6 +307,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;
if (!EnableHomogeneousPrologEpilog)
return false;
+ if (ReverseCSRRestoreSeq)
+ return false;
if (EnableRedZone)
return false;
@@ -3104,27 +3111,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
- if (homogeneousPrologEpilog(MF, &MBB)) {
- auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
- .setMIFlag(MachineInstr::FrameDestroy);
- for (auto &RPI : RegPairs) {
- MIB.addReg(RPI.Reg1, RegState::Define);
- MIB.addReg(RPI.Reg2, RegState::Define);
- }
- return true;
- }
-
- // For performance reasons restore SVE register in increasing order
- auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
- auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
- auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
- std::reverse(PPRBegin, PPREnd.base());
- auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
- auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
- auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
- std::reverse(ZPRBegin, ZPREnd.base());
-
- for (const RegPairInfo &RPI : RegPairs) {
+ auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
@@ -3198,6 +3185,43 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+
+ return MIB->getIterator();
+ };
+
+ if (homogeneousPrologEpilog(MF, &MBB)) {
+ auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ for (auto &RPI : RegPairs) {
+ MIB.addReg(RPI.Reg1, RegState::Define);
+ MIB.addReg(RPI.Reg2, RegState::Define);
+ }
+ return true;
+ }
+
+ // For performance reasons restore SVE register in increasing order
+ auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
+ auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
+ auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR);
+ std::reverse(PPRBegin, PPREnd.base());
+ auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
+ auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
+ auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR);
+ std::reverse(ZPRBegin, ZPREnd.base());
+
+ if (ReverseCSRRestoreSeq) {
+ MachineBasicBlock::iterator First = MBB.end();
+ for (const RegPairInfo &RPI : reverse(RegPairs)) {
+ MachineBasicBlock::iterator It = EmitMI(RPI);
+ if (First == MBB.end())
+ First = It;
+ }
+ if (First != MBB.end())
+ MBB.splice(MBBI, &MBB, First);
+ } else {
+ for (const RegPairInfo &RPI : RegPairs) {
+ (void)EmitMI(RPI);
+ }
}
return true;
diff --git a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
new file mode 100644
index 00000000000000..de4baec50e0c66
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
@@ -0,0 +1,101 @@
+# RUN: llc -run-pass=prologepilog -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
+# RUN: llc -start-before=prologepilog -stop-after=aarch64-ldst-opt -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK
+#
+--- |
+
+ define void @foo() nounwind { entry: unreachable }
+
+ define void @bar() nounwind { entry: unreachable }
+
+ define void @baz() nounwind { entry: unreachable }
+
+...
+---
+name: foo
+# CHECK-LABEL: name: foo
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $x19 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+ $x22 = IMPLICIT_DEF
+ $x23 = IMPLICIT_DEF
+ $x24 = IMPLICIT_DEF
+ $x25 = IMPLICIT_DEF
+ $x26 = IMPLICIT_DEF
+
+ ; The local stack size is 0, so the last ldp in the sequence will also
+ ; restore the stack.
+ ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 2
+ ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 4
+ ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 6
+
+ ; The ldp and the stack increment get merged even before
+ ; the load-store optimizer.
+ ; CHECK-NEXT: early-clobber $sp, $x26, $x25 = frame-destroy LDPXpost $sp, 8
+
+ RET_ReallyLR
+...
+---
+name: bar
+# CHECK-LABEL: name: bar
+tracksRegLiveness: true
+stack:
+ - { id : 0, size: 8, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+
+body: |
+ bb.0:
+ $x19 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+ $x22 = IMPLICIT_DEF
+ $x23 = IMPLICIT_DEF
+ $x24 = IMPLICIT_DEF
+ $x25 = IMPLICIT_DEF
+ $x26 = IMPLICIT_DEF
+
+ ; The local stack size is not 0, and we can combine the CSR stack size with
+ ; the local stack size. This results in rewriting the offsets for all the
+ ; save/restores and forbids us to merge the stack adjustment and the last pop.
+ ; In this case, there is no point of moving the first CSR pair at the end.
+ ; We do it anyway, as it's a small price to pay for the resulting
+ ; simplification in the epilogue emission code.
+ ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 4
+ ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 6
+ ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 8
+ ; CHECK-NEXT: $x26, $x25 = frame-destroy LDPXi $sp, 2
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0
+ RET_ReallyLR
+...
+---
+# Check that the load from the offset 0 is moved at the end even when hasFP is
+# false.
+name: baz
+# CHECK-LABEL: name: baz
+alignment: 4
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+ hasCalls: true
+body: |
+ bb.0:
+ successors: %bb.1
+
+ $x0 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.1
+
+ bb.1:
+ ; CHECK: $x21, $x20 = frame-destroy LDPXi $sp, 2
+ ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 32
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list