[llvm] 8a397b6 - [AArch64][SVE] Add support for spilling/filling ZPR2/3/4
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Thu May 28 03:04:53 PDT 2020
Author: Cullen Rhodes
Date: 2020-05-28T10:02:57Z
New Revision: 8a397b66b2c672999e9e6d63334d5bffd7db1a3f
URL: https://github.com/llvm/llvm-project/commit/8a397b66b2c672999e9e6d63334d5bffd7db1a3f
DIFF: https://github.com/llvm/llvm-project/commit/8a397b66b2c672999e9e6d63334d5bffd7db1a3f.diff
LOG: [AArch64][SVE] Add support for spilling/filling ZPR2/3/4
Summary:
This patch enables the register allocator to spill/fill lists of 2, 3
and 4 SVE vectors registers to/from the stack. This is implemented with
pseudo instructions that get expanded to individual LDR_ZXI/STR_ZXI
instructions in AArch64ExpandPseudoInsts.
Patch by Sander de Smalen.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D75988
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/spillfill-sve.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 381bf86c7d62..b9034862c270 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -80,6 +80,9 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
bool expandSetTagLoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned Opc,
+ unsigned N);
};
} // end anonymous namespace
@@ -595,6 +598,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
return true;
}
+bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Opc, unsigned N) {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineInstr &MI = *MBBI;
+ for (unsigned Offset = 0; Offset < N; ++Offset) {
+ int ImmOffset = MI.getOperand(2).getImm() + Offset;
+ bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
+ assert(ImmOffset >= -256 && ImmOffset < 256 &&
+ "Immediate spill offset out of range");
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
+ .addReg(
+ TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
+ Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
+ .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
+ .addImm(ImmOffset);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -970,6 +995,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
report_fatal_error(
"Non-writeback variants of STGloop / STZGloop should not "
"survive past PrologEpilogInserter.");
+ case AArch64::STR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
+ case AArch64::STR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
+ case AArch64::STR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
+ case AArch64::LDR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
+ case AArch64::LDR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
+ case AArch64::LDR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
}
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 564fd33ca596..fd07c32e5496 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2278,6 +2278,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::STR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 4;
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case AArch64::STR_ZZZXI:
+ case AArch64::LDR_ZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 3;
+ MinOffset = -256;
+ MaxOffset = 253;
+ break;
+ case AArch64::STR_ZZXI:
+ case AArch64::LDR_ZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 2;
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::Scalable(2);
@@ -2984,6 +3005,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -2992,6 +3014,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3031,6 +3058,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
get(AArch64::STPXi), SrcReg, isKill,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -3049,6 +3080,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -3056,6 +3091,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -3063,19 +3102,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@@ -3126,6 +3159,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -3134,6 +3168,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3173,6 +3212,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
get(AArch64::LDPXi), DestReg, AArch64::sube64,
AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -3191,6 +3234,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -3198,6 +3245,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -3205,20 +3256,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index df82680b1f6d..54a764337324 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1334,6 +1334,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+ // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4.
+ // These get expanded to individual LDR_ZXI/STR_ZXI instructions in
+ // AArch64ExpandPseudoInsts.
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ }
+ let mayStore = 1, hasSideEffects = 0 in {
+ def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ }
+
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index 21bdb45965bd..982d232f12f4 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -8,6 +8,9 @@
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
attributes #0 = { nounwind "target-features"="+sve" }
@@ -90,3 +93,120 @@ body: |
$z0 = COPY %0
RET_ReallyLR
...
+---
+name: spills_fills_stack_id_zpr2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: zpr2 }
+stack:
+liveins:
+ - { reg: '$z0_z1', virtual-reg: '%0' }
+body: |
+ bb.0.entry:
+ liveins: $z0_z1
+
+ ; CHECK-LABEL: name: spills_fills_stack_id_zpr2
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
+ ; CHECK-NEXT: stack-id: sve-vec
+
+ ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
+ ; EXPAND: STR_ZXI $z0, $sp, 0
+ ; EXPAND: STR_ZXI $z1, $sp, 1
+ ; EXPAND: $z0 = LDR_ZXI $sp, 0
+ ; EXPAND: $z1 = LDR_ZXI $sp, 1
+
+ %0:zpr2 = COPY $z0_z1
+
+ $z0_z1_z2_z3 = IMPLICIT_DEF
+ $z4_z5_z6_z7 = IMPLICIT_DEF
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+
+ $z0_z1 = COPY %0
+ RET_ReallyLR
+...
+---
+name: spills_fills_stack_id_zpr3
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: zpr3 }
+stack:
+liveins:
+ - { reg: '$z0_z1_z2', virtual-reg: '%0' }
+body: |
+ bb.0.entry:
+ liveins: $z0_z1_z2
+
+ ; CHECK-LABEL: name: spills_fills_stack_id_zpr3
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16
+ ; CHECK-NEXT: stack-id: sve-vec
+
+ ; EXPAND-LABEL: name: spills_fills_stack_id_zpr3
+ ; EXPAND: STR_ZXI $z0, $sp, 0
+ ; EXPAND: STR_ZXI $z1, $sp, 1
+ ; EXPAND: STR_ZXI $z2, $sp, 2
+ ; EXPAND: $z0 = LDR_ZXI $sp, 0
+ ; EXPAND: $z1 = LDR_ZXI $sp, 1
+ ; EXPAND: $z2 = LDR_ZXI $sp, 2
+
+ %0:zpr3 = COPY $z0_z1_z2
+
+ $z0_z1_z2_z3 = IMPLICIT_DEF
+ $z4_z5_z6_z7 = IMPLICIT_DEF
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+
+ $z0_z1_z2 = COPY %0
+ RET_ReallyLR
+...
+---
+name: spills_fills_stack_id_zpr4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: zpr4 }
+stack:
+liveins:
+ - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
+body: |
+ bb.0.entry:
+ liveins: $z0_z1_z2_z3
+
+ ; CHECK-LABEL: name: spills_fills_stack_id_zpr4
+ ; CHECK: stack:
+ ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
+ ; CHECK-NEXT: stack-id: sve-vec
+
+ ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
+ ; EXPAND: STR_ZXI $z0, $sp, 0
+ ; EXPAND: STR_ZXI $z1, $sp, 1
+ ; EXPAND: STR_ZXI $z2, $sp, 2
+ ; EXPAND: STR_ZXI $z3, $sp, 3
+ ; EXPAND: $z0 = LDR_ZXI $sp, 0
+ ; EXPAND: $z1 = LDR_ZXI $sp, 1
+ ; EXPAND: $z2 = LDR_ZXI $sp, 2
+ ; EXPAND: $z3 = LDR_ZXI $sp, 3
+
+ %0:zpr4 = COPY $z0_z1_z2_z3
+
+ $z0_z1_z2_z3 = IMPLICIT_DEF
+ $z4_z5_z6_z7 = IMPLICIT_DEF
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+
+ $z0_z1_z2_z3 = COPY %0
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list