[llvm] r331044 - [AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true
Francis Visoiu Mistrih via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 27 08:30:54 PDT 2018
Author: thegameg
Date: Fri Apr 27 08:30:54 2018
New Revision: 331044
URL: http://llvm.org/viewvc/llvm-project?rev=331044&view=rev
Log:
[AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true
Put the first ldp at the end, so that the load-store optimizer can run
and merge the ldp and the add into a post-index ldp.
This didn't work in case no frame was needed and resulted in code size
regressions.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/trunk/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=331044&r1=331043&r2=331044&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Fri Apr 27 08:30:54 2018
@@ -514,6 +514,38 @@ static void fixupCalleeSaveRestoreStackO
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
}
+static void adaptForLdStOpt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator FirstSPPopI,
+ MachineBasicBlock::iterator LastPopI) {
+ // Sometimes (when we restore in the same order as we save), we can end up
+ // with code like this:
+ //
+ // ldp x26, x25, [sp]
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // add sp, sp, #64
+ //
+ // In this case, it is always better to put the first ldp at the end, so
+ // that the load-store optimizer can run and merge the ldp and the add into
+ // a post-index ldp.
+ // If we managed to grab the first pop instruction, move it to the end.
+ if (ReverseCSRRestoreSeq)
+ MBB.splice(FirstSPPopI, &MBB, LastPopI);
+ // We should end up with something like this now:
+ //
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // ldp x26, x25, [sp]
+ // add sp, sp, #64
+ //
+ // and the load-store optimizer can merge the last two instructions into:
+ //
+ // ldp x26, x25, [sp], #64
+ //
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -930,12 +962,20 @@ void AArch64FrameLowering::emitEpilogue(
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
- if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
+ bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
+
+ // If we're done after this, make sure to help the load store optimizer.
+ if (Done)
+ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
+
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+ if (Done)
return;
+
NumBytes = 0;
}
@@ -967,33 +1007,8 @@ void AArch64FrameLowering::emitEpilogue(
FirstSPPopI = Prev;
}
- // Sometimes (when we restore in the same order as we save), we can end up
- // with code like this:
- //
- // ldp x26, x25, [sp]
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // add sp, sp, #64
- //
- // In this case, it is always better to put the first ldp at the end, so
- // that the load-store optimizer can run and merge the ldp and the add into
- // a post-index ldp.
- // If we managed to grab the first pop instruction, move it to the end.
- if (LastPopI != Begin)
- MBB.splice(FirstSPPopI, &MBB, LastPopI);
- // We should end up with something like this now:
- //
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // ldp x26, x25, [sp]
- // add sp, sp, #64
- //
- // and the load-store optimizer can merge the last two instructions into:
- //
- // ldp x26, x25, [sp], #64
- //
+ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
+
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
}
Modified: llvm/trunk/test/CodeGen/AArch64/reverse-csr-restore-seq.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/reverse-csr-restore-seq.mir?rev=331044&r1=331043&r2=331044&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/reverse-csr-restore-seq.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/reverse-csr-restore-seq.mir Fri Apr 27 08:30:54 2018
@@ -7,6 +7,8 @@
define void @bar() nounwind { entry: unreachable }
+ define void @baz() nounwind { entry: unreachable }
+
...
---
name: foo
@@ -71,3 +73,34 @@ body: |
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0
RET_ReallyLR
...
+---
+# Check that the load from the offset 0 is moved at the end even when hasFP is
+# false.
+name: baz
+# CHECK-LABEL: name: baz
+alignment: 2
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+ hasCalls: true
+body: |
+ bb.0:
+ successors: %bb.1
+
+ $x0 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ B %bb.1
+
+ bb.1:
+ ; CHECK: $x20, $lr = frame-destroy LDPXi $sp, 2
+ ; BEFORELDSTOPT-NEXT: $x21 = frame-destroy LDRXui $sp, 0
+ ; BEFORELDSTOPT-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+
+ ; AFTERLDSTOPT-NEXT: early-clobber $sp, $x21 = frame-destroy LDRXpost $sp, 32
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list