[llvm] 8a397b6 - [AArch64][SVE] Add support for spilling/filling ZPR2/3/4

Cullen Rhodes via llvm-commits llvm-commits at lists.llvm.org
Thu May 28 03:04:53 PDT 2020


Author: Cullen Rhodes
Date: 2020-05-28T10:02:57Z
New Revision: 8a397b66b2c672999e9e6d63334d5bffd7db1a3f

URL: https://github.com/llvm/llvm-project/commit/8a397b66b2c672999e9e6d63334d5bffd7db1a3f
DIFF: https://github.com/llvm/llvm-project/commit/8a397b66b2c672999e9e6d63334d5bffd7db1a3f.diff

LOG: [AArch64][SVE] Add support for spilling/filling ZPR2/3/4

Summary:
This patch enables the register allocator to spill/fill lists of 2, 3
and 4 SVE vectors registers to/from the stack. This is implemented with
pseudo instructions that get expanded to individual LDR_ZXI/STR_ZXI
instructions in AArch64ExpandPseudoInsts.

Patch by Sander de Smalen.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D75988

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/spillfill-sve.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 381bf86c7d62..b9034862c270 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -80,6 +80,9 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
   bool expandSetTagLoop(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator MBBI,
                         MachineBasicBlock::iterator &NextMBBI);
+  bool expandSVESpillFill(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI, unsigned Opc,
+                          unsigned N);
 };
 
 } // end anonymous namespace
@@ -595,6 +598,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
   return true;
 }
 
+bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MBBI,
+                                             unsigned Opc, unsigned N) {
+  const TargetRegisterInfo *TRI =
+      MBB.getParent()->getSubtarget().getRegisterInfo();
+  MachineInstr &MI = *MBBI;
+  for (unsigned Offset = 0; Offset < N; ++Offset) {
+    int ImmOffset = MI.getOperand(2).getImm() + Offset;
+    bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
+    assert(ImmOffset >= -256 && ImmOffset < 256 &&
+           "Immediate spill offset out of range");
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
+        .addReg(
+            TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
+            Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
+        .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
+        .addImm(ImmOffset);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -970,6 +995,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
      report_fatal_error(
          "Non-writeback variants of STGloop / STZGloop should not "
          "survive past PrologEpilogInserter.");
+   case AArch64::STR_ZZZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
+   case AArch64::STR_ZZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
+   case AArch64::STR_ZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
+   case AArch64::LDR_ZZZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
+   case AArch64::LDR_ZZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
+   case AArch64::LDR_ZZXI:
+     return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
   }
   return false;
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 564fd33ca596..fd07c32e5496 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2278,6 +2278,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
     MinOffset = -256;
     MaxOffset = 255;
     break;
+  case AArch64::STR_ZZZZXI:
+  case AArch64::LDR_ZZZZXI:
+    Scale = TypeSize::Scalable(16);
+    Width = SVEMaxBytesPerVector * 4;
+    MinOffset = -256;
+    MaxOffset = 252;
+    break;
+  case AArch64::STR_ZZZXI:
+  case AArch64::LDR_ZZZXI:
+    Scale = TypeSize::Scalable(16);
+    Width = SVEMaxBytesPerVector * 3;
+    MinOffset = -256;
+    MaxOffset = 253;
+    break;
+  case AArch64::STR_ZZXI:
+  case AArch64::LDR_ZZXI:
+    Scale = TypeSize::Scalable(16);
+    Width = SVEMaxBytesPerVector * 2;
+    MinOffset = -256;
+    MaxOffset = 254;
+    break;
   case AArch64::LDR_PXI:
   case AArch64::STR_PXI:
     Scale = TypeSize::Scalable(2);
@@ -2984,6 +3005,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
   unsigned Opc = 0;
   bool Offset = true;
+  unsigned StackID = TargetStackID::Default;
   switch (TRI->getSpillSize(*RC)) {
   case 1:
     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -2992,6 +3014,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
   case 2:
     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
       Opc = AArch64::STRHui;
+    else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+      Opc = AArch64::STR_PXI;
+      StackID = TargetStackID::SVEVector;
+    }
     break;
   case 4:
     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3031,6 +3058,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
                               get(AArch64::STPXi), SrcReg, isKill,
                               AArch64::sube64, AArch64::subo64, FI, MMO);
       return;
+    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+      Opc = AArch64::STR_ZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 24:
@@ -3049,6 +3080,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Twov2d;
       Offset = false;
+    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+      Opc = AArch64::STR_ZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 48:
@@ -3056,6 +3091,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Threev2d;
       Offset = false;
+    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+      Opc = AArch64::STR_ZZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 64:
@@ -3063,19 +3102,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
       Opc = AArch64::ST1Fourv2d;
       Offset = false;
+    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+      Opc = AArch64::STR_ZZZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   }
-  unsigned StackID = TargetStackID::Default;
-  if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
-    assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
-    Opc = AArch64::STR_PXI;
-    StackID = TargetStackID::SVEVector;
-  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
-    assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
-    Opc = AArch64::STR_ZXI;
-    StackID = TargetStackID::SVEVector;
-  }
   assert(Opc && "Unknown register class");
   MFI.setStackID(FI, StackID);
 
@@ -3126,6 +3159,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
 
   unsigned Opc = 0;
   bool Offset = true;
+  unsigned StackID = TargetStackID::Default;
   switch (TRI->getSpillSize(*RC)) {
   case 1:
     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -3134,6 +3168,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   case 2:
     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
       Opc = AArch64::LDRHui;
+    else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+      Opc = AArch64::LDR_PXI;
+      StackID = TargetStackID::SVEVector;
+    }
     break;
   case 4:
     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3173,6 +3212,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
                                AArch64::subo64, FI, MMO);
       return;
+    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+      Opc = AArch64::LDR_ZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 24:
@@ -3191,6 +3234,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Twov2d;
       Offset = false;
+    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+      Opc = AArch64::LDR_ZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 48:
@@ -3198,6 +3245,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Threev2d;
       Offset = false;
+    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+      Opc = AArch64::LDR_ZZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   case 64:
@@ -3205,20 +3256,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
       Opc = AArch64::LD1Fourv2d;
       Offset = false;
+    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+      assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+      Opc = AArch64::LDR_ZZZZXI;
+      StackID = TargetStackID::SVEVector;
     }
     break;
   }
 
-  unsigned StackID = TargetStackID::Default;
-  if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
-    assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
-    Opc = AArch64::LDR_PXI;
-    StackID = TargetStackID::SVEVector;
-  } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
-    assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
-    Opc = AArch64::LDR_ZXI;
-    StackID = TargetStackID::SVEVector;
-  }
   assert(Opc && "Unknown register class");
   MFI.setStackID(FI, StackID);
 

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index df82680b1f6d..54a764337324 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1334,6 +1334,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
                   (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
 
+  // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4.
+  // These get expanded to individual LDR_ZXI/STR_ZXI instructions in
+  // AArch64ExpandPseudoInsts.
+  let mayLoad = 1, hasSideEffects = 0 in {
+    def LDR_ZZXI   : Pseudo<(outs   ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+    def LDR_ZZZXI  : Pseudo<(outs  ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+    def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+  }
+  let mayStore = 1, hasSideEffects = 0 in {
+    def STR_ZZXI   : Pseudo<(outs), (ins   ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+    def STR_ZZZXI  : Pseudo<(outs), (ins  ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+    def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+  }
+
   def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
             (PTEST_PP PPR:$pg, PPR:$src)>;
   def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),

diff  --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index 21bdb45965bd..982d232f12f4 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -8,6 +8,9 @@
 
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
+  define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
+  define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
+  define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
 
   attributes #0 = { nounwind "target-features"="+sve" }
 
@@ -90,3 +93,120 @@ body:             |
     $z0 = COPY %0
     RET_ReallyLR
 ...
+---
+name: spills_fills_stack_id_zpr2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: zpr2 }
+stack:
+liveins:
+  - { reg: '$z0_z1', virtual-reg: '%0' }
+body:             |
+  bb.0.entry:
+    liveins: $z0_z1
+
+    ; CHECK-LABEL: name: spills_fills_stack_id_zpr2
+    ; CHECK: stack:
+    ; CHECK:      - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
+    ; CHECK-NEXT:     stack-id: sve-vec
+
+    ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
+    ; EXPAND: STR_ZXI $z0, $sp, 0
+    ; EXPAND: STR_ZXI $z1, $sp, 1
+    ; EXPAND: $z0 = LDR_ZXI $sp, 0
+    ; EXPAND: $z1 = LDR_ZXI $sp, 1
+
+    %0:zpr2 = COPY $z0_z1
+
+    $z0_z1_z2_z3     = IMPLICIT_DEF
+    $z4_z5_z6_z7     = IMPLICIT_DEF
+    $z8_z9_z10_z11   = IMPLICIT_DEF
+    $z12_z13_z14_z15 = IMPLICIT_DEF
+    $z16_z17_z18_z19 = IMPLICIT_DEF
+    $z20_z21_z22_z23 = IMPLICIT_DEF
+    $z24_z25_z26_z27 = IMPLICIT_DEF
+    $z28_z29_z30_z31 = IMPLICIT_DEF
+
+    $z0_z1 = COPY %0
+    RET_ReallyLR
+...
+---
+name: spills_fills_stack_id_zpr3
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: zpr3 }
+stack:
+liveins:
+  - { reg: '$z0_z1_z2', virtual-reg: '%0' }
+body:             |
+  bb.0.entry:
+    liveins: $z0_z1_z2
+
+    ; CHECK-LABEL: name: spills_fills_stack_id_zpr3
+    ; CHECK: stack:
+    ; CHECK:      - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16
+    ; CHECK-NEXT:     stack-id: sve-vec
+
+    ; EXPAND-LABEL: name: spills_fills_stack_id_zpr3
+    ; EXPAND: STR_ZXI $z0, $sp, 0
+    ; EXPAND: STR_ZXI $z1, $sp, 1
+    ; EXPAND: STR_ZXI $z2, $sp, 2
+    ; EXPAND: $z0 = LDR_ZXI $sp, 0
+    ; EXPAND: $z1 = LDR_ZXI $sp, 1
+    ; EXPAND: $z2 = LDR_ZXI $sp, 2
+
+    %0:zpr3 = COPY $z0_z1_z2
+
+    $z0_z1_z2_z3     = IMPLICIT_DEF
+    $z4_z5_z6_z7     = IMPLICIT_DEF
+    $z8_z9_z10_z11   = IMPLICIT_DEF
+    $z12_z13_z14_z15 = IMPLICIT_DEF
+    $z16_z17_z18_z19 = IMPLICIT_DEF
+    $z20_z21_z22_z23 = IMPLICIT_DEF
+    $z24_z25_z26_z27 = IMPLICIT_DEF
+    $z28_z29_z30_z31 = IMPLICIT_DEF
+
+    $z0_z1_z2 = COPY %0
+    RET_ReallyLR
+...
+---
+name: spills_fills_stack_id_zpr4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: zpr4 }
+stack:
+liveins:
+  - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
+body:             |
+  bb.0.entry:
+    liveins: $z0_z1_z2_z3
+
+    ; CHECK-LABEL: name: spills_fills_stack_id_zpr4
+    ; CHECK: stack:
+    ; CHECK:      - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
+    ; CHECK-NEXT:     stack-id: sve-vec
+
+    ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
+    ; EXPAND: STR_ZXI $z0, $sp, 0
+    ; EXPAND: STR_ZXI $z1, $sp, 1
+    ; EXPAND: STR_ZXI $z2, $sp, 2
+    ; EXPAND: STR_ZXI $z3, $sp, 3
+    ; EXPAND: $z0 = LDR_ZXI $sp, 0
+    ; EXPAND: $z1 = LDR_ZXI $sp, 1
+    ; EXPAND: $z2 = LDR_ZXI $sp, 2
+    ; EXPAND: $z3 = LDR_ZXI $sp, 3
+
+    %0:zpr4 = COPY $z0_z1_z2_z3
+
+    $z0_z1_z2_z3     = IMPLICIT_DEF
+    $z4_z5_z6_z7     = IMPLICIT_DEF
+    $z8_z9_z10_z11   = IMPLICIT_DEF
+    $z12_z13_z14_z15 = IMPLICIT_DEF
+    $z16_z17_z18_z19 = IMPLICIT_DEF
+    $z20_z21_z22_z23 = IMPLICIT_DEF
+    $z24_z25_z26_z27 = IMPLICIT_DEF
+    $z28_z29_z30_z31 = IMPLICIT_DEF
+
+    $z0_z1_z2_z3 = COPY %0
+    RET_ReallyLR
+...


        


More information about the llvm-commits mailing list