[llvm] r291180 - [AArch64] Fold some filled/spilled subreg COPYs
Geoff Berry via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 5 13:51:43 PST 2017
Author: gberry
Date: Thu Jan 5 15:51:42 2017
New Revision: 291180
URL: http://llvm.org/viewvc/llvm-project?rev=291180&view=rev
Log:
[AArch64] Fold some filled/spilled subreg COPYs
Summary:
Extend AArch64 foldMemoryOperandImpl() to handle folding spills of
subreg COPYs with read-undef defs like:
%vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64:%vreg0
by widening the spilled physical source reg and generating:
STRXui %XZR <fi#0>
as well as folding fills of similar COPYs like:
%vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
by generating:
%vreg0:sub_32<def,read-undef> = LDRWui <fi#0>
Reviewers: MatzeB, qcolombet
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: https://reviews.llvm.org/D27425
Added:
llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=291180&r1=291179&r2=291180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jan 5 15:51:42 2017
@@ -2583,7 +2583,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
//
// <rdar://problem/11522048>
//
- if (MI.isCopy()) {
+ if (MI.isFullCopy()) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
@@ -2598,7 +2598,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
}
}
- // Handle the case where a copy is being spilled or refilled but the source
+ // Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2613,7 @@ MachineInstr *AArch64InstrInfo::foldMemo
//
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
//
- // will be refilled as
+ // will be filled as
//
// LDRDui %vreg0, fi<#0>
//
@@ -2622,9 +2622,11 @@ MachineInstr *AArch64InstrInfo::foldMemo
// LDRXui %vregTemp, fi<#0>
// %vreg0 = FMOV %vregTemp
//
- if (MI.isFullCopy() && Ops.size() == 1 &&
+ if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
+ bool IsSpill = Ops[0] == 0;
+ bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2634,112 @@ MachineInstr *AArch64InstrInfo::foldMemo
const MachineOperand &SrcMO = MI.getOperand(1);
unsigned DstReg = DstMO.getReg();
unsigned SrcReg = SrcMO.getReg();
+ // This is slightly expensive to compute for physical regs since
+ // getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
- const TargetRegisterClass &DstRC = *getRegClass(DstReg);
- const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
- if (DstRC.getSize() == SrcRC.getSize()) {
- if (Ops[0] == 0)
+
+ if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+ assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ "Mismatched register size in non subreg COPY");
+ if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
- &SrcRC, &TRI);
+ getRegClass(SrcReg), &TRI);
else
- loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
+
+ // Handle cases like spilling def of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ //
+ // where the physical register source can be widened and stored to the full
+ // virtual reg destination stack slot, in this case producing:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ if (IsSpill && DstMO.isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ assert(SrcMO.getSubReg() == 0 &&
+ "Unexpected subreg on physical register");
+ const TargetRegisterClass *SpillRC;
+ unsigned SpillSubreg;
+ switch (DstMO.getSubReg()) {
+ default:
+ SpillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ case AArch64::ssub:
+ if (AArch64::GPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::GPR64RegClass;
+ SpillSubreg = AArch64::sub_32;
+ } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR64RegClass;
+ SpillSubreg = AArch64::ssub;
+ } else
+ SpillRC = nullptr;
+ break;
+ case AArch64::dsub:
+ if (AArch64::FPR64RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR128RegClass;
+ SpillSubreg = AArch64::dsub;
+ } else
+ SpillRC = nullptr;
+ break;
+ }
+
+ if (SpillRC)
+ if (unsigned WidenedSrcReg =
+ TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+ storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+ FrameIndex, SpillRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
+ // Handle cases like filling use of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ //
+ // where we can load the full virtual reg source stack slot, into the subreg
+ // destination, in this case producing:
+ //
+ // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ //
+ if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+ const TargetRegisterClass *FillRC;
+ switch (DstMO.getSubReg()) {
+ default:
+ FillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ FillRC = &AArch64::GPR32RegClass;
+ break;
+ case AArch64::ssub:
+ FillRC = &AArch64::FPR32RegClass;
+ break;
+ case AArch64::dsub:
+ FillRC = &AArch64::FPR64RegClass;
+ break;
+ }
+
+ if (FillRC) {
+ assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ "Mismatched regclass size on folded subreg COPY");
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+ MachineInstr &LoadMI = *--InsertPt;
+ MachineOperand &LoadDst = LoadMI.getOperand(0);
+ assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+ LoadDst.setSubReg(DstMO.getSubReg());
+ LoadDst.setIsUndef();
+ return &LoadMI;
+ }
+ }
}
// Cannot fold.
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h?rev=291180&r1=291179&r2=291180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.h Thu Jan 5 15:51:42 2017
@@ -162,6 +162,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // This tells target independent code that it is okay to pass instructions
+ // with subreg operands to foldMemoryOperandImpl.
+ bool isSubregFoldable() const override { return true; }
+
using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
Added: llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir?rev=291180&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/AArch64/spill-fold.mir Thu Jan 5 15:51:42 2017
@@ -0,0 +1,82 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+--- |
+ define i64 @test_subreg_spill_fold() { ret i64 0 }
+ define i64 @test_subreg_spill_fold2() { ret i64 0 }
+ define i64 @test_subreg_spill_fold3() { ret i64 0 }
+ define i64 @test_subreg_fill_fold() { ret i64 0 }
+ define double @test_subreg_fill_fold2() { ret double 0.0 }
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold
+# Ensure that the spilled subreg COPY is eliminated and folded into the spill store.
+name: test_subreg_spill_fold
+registers:
+ - { id: 0, class: gpr64 }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.sub_32 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ %x0 = COPY %0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold2
+# Similar to test_subreg_spill_fold, but with a vreg0 register class not containing %WZR.
+name: test_subreg_spill_fold2
+registers:
+ - { id: 0, class: gpr64sp }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.sub_32 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ %x0 = ADDXri %0, 1, 0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_spill_fold3
+# Similar to test_subreg_spill_fold, but with a cross register class copy.
+name: test_subreg_spill_fold3
+registers:
+ - { id: 0, class: fpr64 }
+body: |
+ bb.0:
+ ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0)
+ undef %0.ssub = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %d0, 12, implicit-def dead %d1, 12, implicit-def dead %d2, 12, implicit-def dead %d3, 12, implicit-def dead %d4, 12, implicit-def dead %d5, 12, implicit-def dead %d6, 12, implicit-def dead %d7, 12, implicit-def dead %d8, 12, implicit-def dead %d9, 12, implicit-def dead %d10, 12, implicit-def dead %d11, 12, implicit-def dead %d12, 12, implicit-def dead %d13, 12, implicit-def dead %d14, 12, implicit-def dead %d15, 12, implicit-def dead %d16, 12, implicit-def dead %d17, 12, implicit-def dead %d18, 12, implicit-def dead %d19, 12, implicit-def dead %d20, 12, implicit-def dead %d21, 12, implicit-def dead %d22, 12, implicit-def dead %d23, 12, implicit-def dead %d24, 12, implicit-def dead %d25, 12, implicit-def dead %d26, 12, implicit-def dead %d27, 12, implicit-def dead %d28, 12, implicit-def dead %d29, 12, implicit-def dead %d30, 12, implicit-def %d31
+ %x0 = COPY %0
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold
+# Ensure that the filled COPY is eliminated and folded into the fill load.
+name: test_subreg_fill_fold
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr64 }
+body: |
+ bb.0:
+ %0 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ ; CHECK: undef %1.sub_32 = LDRWui %stack.0, 0 :: (load 4 from %stack.0)
+ undef %1.sub_32 = COPY %0
+ %x0 = COPY %1
+ RET_ReallyLR implicit %x0
+...
+---
+# CHECK-LABEL: name: test_subreg_fill_fold2
+# Similar to test_subreg_fill_fold, but with a cross-class copy.
+name: test_subreg_fill_fold2
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: fpr64 }
+body: |
+ bb.0:
+ %0 = COPY %wzr
+ INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp
+ ; CHECK: undef %1.ssub = LDRSui %stack.0, 0 :: (load 4 from %stack.0)
+ undef %1.ssub = COPY %0
+ %d0 = COPY %1
+ RET_ReallyLR implicit %d0
+...
More information about the llvm-commits
mailing list