[llvm] 94b195f - [ARM][LowOverheadLoops] Add horizontal reduction support

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 01:56:09 PDT 2020


Author: Sam Parker
Date: 2020-03-30T09:55:41+01:00
New Revision: 94b195ff1236004a0b74a63803b71d45a8ca496a

URL: https://github.com/llvm/llvm-project/commit/94b195ff1236004a0b74a63803b71d45a8ca496a
DIFF: https://github.com/llvm/llvm-project/commit/94b195ff1236004a0b74a63803b71d45a8ca496a.diff

LOG: [ARM][LowOverheadLoops] Add horizontal reduction support

Add a bit more logic into the 'FalseLaneZeros' tracking to enable
horizontal reductions and also make the VADDV variants
validForTailPredication.

Differential Revision: https://reviews.llvm.org/D76708

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMInstrMVE.td
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
    llvm/unittests/Target/ARM/MachineInstrTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 22374bc9c9ef..2972e4e002c1 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -607,6 +607,7 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
   let Inst{3-1} = Qm{2-0};
   let Inst{0} = 0b0;
   let horizontalReduction = 1;
+  let validForTailPredication = 1;
 }
 
 def ARMVADDVs       : SDNode<"ARMISD::VADDVs", SDTVecReduce>;

diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index d3d33e47f670..b9572ef3e9d0 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -538,6 +538,12 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
   return (Flags & ARMII::DoubleWidthResult) != 0;
 }
 
+static bool isHorizontalReduction(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  uint64_t Flags = MCID.TSFlags;
+  return (Flags & ARMII::HorizontalReduction) != 0;
+}
+
 // Can this instruction generate a non-zero result when given only zeroed
 // operands? This allows us to know that, given operands with false bytes
 // zeroed by masked loads, that the result will also contain zeros in those
@@ -569,20 +575,24 @@ static bool canGenerateNonZeros(const MachineInstr &MI) {
 
 // Look at its register uses to see if it only can only receive zeros
 // into its false lanes which would then produce zeros. Also check that
-// the output register is also defined by an FalseLaneZeros instruction
+// the output register is also defined by an FalseLanesZero instruction
 // so that if tail-predication happens, the lanes that aren't updated will
 // still be zeros.
-static bool producesFalseLaneZeros(MachineInstr &MI,
+static bool producesFalseLanesZero(MachineInstr &MI,
                                    const TargetRegisterClass *QPRs,
                                    const ReachingDefAnalysis &RDA,
-                                   InstSet &FalseLaneZeros) {
+                                   InstSet &FalseLanesZero) {
   if (canGenerateNonZeros(MI))
     return false;
+
+  bool AllowScalars = isHorizontalReduction(MI);
   for (auto &MO : MI.operands()) {
     if (!MO.isReg() || !MO.getReg())
       continue;
+    if (!isRegInClass(MO, QPRs) && AllowScalars)
+      continue;
     if (auto *OpDef = RDA.getMIOperand(&MI, MO))
-      if (FalseLaneZeros.count(OpDef))
+      if (FalseLanesZero.count(OpDef))
        continue;
     return false;
   }
@@ -613,8 +623,8 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
   // loads, stores and other predicated instructions into our Predicated
   // set and build from there.
   const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
-  SetVector<MachineInstr *> Unknown;
-  SmallPtrSet<MachineInstr *, 4> FalseLaneZeros;
+  SetVector<MachineInstr *> FalseLanesUnknown;
+  SmallPtrSet<MachineInstr *, 4> FalseLanesZero;
   SmallPtrSet<MachineInstr *, 4> Predicated;
   MachineBasicBlock *MBB = ML.getHeader();
 
@@ -624,9 +634,14 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
     if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
       continue;
 
+    if (isVCTP(&MI) || MI.getOpcode() == ARM::MVE_VPST)
+      continue;
+
+    // Predicated loads will write zeros to the falsely predicated bytes of the
+    // destination register.
     if (isVectorPredicated(&MI)) {
       if (MI.mayLoad())
-        FalseLaneZeros.insert(&MI);
+        FalseLanesZero.insert(&MI);
       Predicated.insert(&MI);
       continue;
     }
@@ -634,12 +649,16 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
     if (MI.getNumDefs() == 0)
       continue;
 
-    if (producesFalseLaneZeros(MI, QPRs, RDA, FalseLaneZeros))
-      FalseLaneZeros.insert(&MI);
-    else if (retainsPreviousHalfElement(MI))
-      return false;
-    else
-      Unknown.insert(&MI);
+    if (!producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) {
+      // We require retaining and horizontal operations to operate upon zero'd
+      // false lanes to ensure the conversion doesn't change the output.
+      if (retainsPreviousHalfElement(MI) || isHorizontalReduction(MI))
+        return false;
+      // Otherwise we need to evaluate this instruction later to see whether
+      // unknown false lanes will get masked away by their user(s).
+      FalseLanesUnknown.insert(&MI);
+    } else if (!isHorizontalReduction(MI))
+      FalseLanesZero.insert(&MI);
   }
 
   auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
@@ -655,8 +674,9 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
 
   // Visit the unknowns in reverse so that we can start at the values being
   // stored and then we can work towards the leaves, hopefully adding more
-  // instructions to Predicated.
-  for (auto *MI : reverse(Unknown)) {
+  // instructions to Predicated. Successfully terminating the loop means that
+  // all the unknown values have to found to be masked by predicated user(s).
+  for (auto *MI : reverse(FalseLanesUnknown)) {
     for (auto &MO : MI->operands()) {
       if (!isRegInClass(MO, QPRs) || !MO.isDef())
         continue;

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
index 914f74ce6df6..ae1c783c0606 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir
@@ -853,25 +853,14 @@ body:             |
   ; CHECK:   tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2IT 0, 8, implicit-def $itstate
   ; CHECK:   tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK:   renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   dead $lr = t2DLS renamable $r12
-  ; CHECK:   $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r2
   ; CHECK: bb.1.vector.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2)
+  ; CHECK:   liveins: $lr, $r0, $r1
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2)
   ; CHECK:   renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
-  ; CHECK:   $lr = tMOVr $r3, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
-  ; CHECK:   renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.1
   ; CHECK: bb.2.exit:
   ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:
@@ -955,25 +944,14 @@ body:             |
   ; CHECK:   tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2IT 0, 8, implicit-def $itstate
   ; CHECK:   tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK:   renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   dead $lr = t2DLS renamable $r12
-  ; CHECK:   $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_16 killed renamable $r2
   ; CHECK: bb.1.vector.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2)
+  ; CHECK:   liveins: $lr, $r0, $r1
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 2)
   ; CHECK:   renamable $r12 = MVE_VADDVs16no_acc killed renamable $q0, 0, $noreg
-  ; CHECK:   $lr = tMOVr $r3, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
-  ; CHECK:   renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.1
   ; CHECK: bb.2.exit:
   ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:
@@ -1057,25 +1035,14 @@ body:             |
   ; CHECK:   tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2IT 0, 8, implicit-def $itstate
   ; CHECK:   tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK:   renamable $r3, dead $cpsr = tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r3 = t2BICri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2SUBri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   dead $lr = t2DLS renamable $r12
-  ; CHECK:   $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_8 killed renamable $r2
   ; CHECK: bb.1.vector.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1)
+  ; CHECK:   liveins: $lr, $r0, $r1
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 1)
   ; CHECK:   renamable $r12 = MVE_VADDVs8no_acc killed renamable $q0, 0, $noreg
-  ; CHECK:   $lr = tMOVr $r3, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr)
-  ; CHECK:   renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.1
   ; CHECK: bb.2.exit:
   ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:
@@ -1159,25 +1126,14 @@ body:             |
   ; CHECK: bb.1.vector.ph:
   ; CHECK:   successors: %bb.2(0x80000000)
   ; CHECK:   liveins: $r0, $r1
-  ; CHECK:   renamable $r2, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   dead $lr = t2DLS renamable $r2
-  ; CHECK:   $r3 = tMOVr killed $r2, 14 /* CC::al */, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r1
   ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
   ; CHECK: bb.2.vector.body:
   ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg
-  ; CHECK:   $lr = tMOVr $r3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2)
+  ; CHECK:   liveins: $lr, $r0, $r2
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2)
   ; CHECK:   renamable $r2 = MVE_VADDVu32acc killed renamable $r2, killed renamable $q0, 0, $noreg
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.2
   ; CHECK: bb.3.exit:
   ; CHECK:   liveins: $r2
   ; CHECK:   $r0 = tMOVr killed $r2, 14 /* CC::al */, $noreg
@@ -2705,32 +2661,21 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
-  ; CHECK:   tCMPi8 renamable $r2, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   $r3 = tMOVr $r2, 14 /* CC::al */, $noreg
-  ; CHECK:   t2IT 10, 8, implicit-def $itstate
-  ; CHECK:   renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
   ; CHECK:   tCMPi8 renamable $r2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.4, 11 /* CC::lt */, killed $cpsr
   ; CHECK: bb.1.while.body.preheader:
   ; CHECK:   successors: %bb.2(0x80000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $r3, dead $cpsr = tSUBrr renamable $r2, killed renamable $r3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   liveins: $r0, $r1, $r2
   ; CHECK:   renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = t2DLS killed renamable $lr
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r2
   ; CHECK: bb.2.while.body:
   ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r12
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
-  ; CHECK:   MVE_VPST 4, implicit $vpr
-  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.tmp3, align 2)
-  ; CHECK:   renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.tmp1, align 2)
+  ; CHECK:   liveins: $lr, $r0, $r1, $r12
+  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.tmp3, align 2)
+  ; CHECK:   renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.tmp1, align 2)
   ; CHECK:   renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r12 = MVE_VADDVu32acc killed renamable $r12, killed renamable $q0, 0, $noreg
-  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.2
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.2
   ; CHECK: bb.3.while.end:
   ; CHECK:   liveins: $r12
   ; CHECK:   $r0 = tMOVr killed $r12, 14 /* CC::al */, $noreg
@@ -2831,33 +2776,22 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
-  ; CHECK:   tCMPi8 renamable $r2, 8, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   $r3 = tMOVr $r2, 14 /* CC::al */, $noreg
-  ; CHECK:   t2IT 10, 8, implicit-def $itstate
-  ; CHECK:   renamable $r3 = tMOVi8 $noreg, 8, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
   ; CHECK:   tCMPi8 renamable $r2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.4, 11 /* CC::lt */, killed $cpsr
   ; CHECK: bb.1.while.body.preheader:
   ; CHECK:   successors: %bb.2(0x80000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $r3, dead $cpsr = tSUBrr renamable $r2, killed renamable $r3, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r3, 7, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   liveins: $r0, $r1, $r2
   ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = t2DLS killed renamable $lr
+  ; CHECK:   $lr = MVE_DLSTP_16 killed renamable $r2
   ; CHECK: bb.2.while.body:
   ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg
-  ; CHECK:   MVE_VPST 4, implicit $vpr
-  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2)
-  ; CHECK:   renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2)
+  ; CHECK:   liveins: $lr, $r0, $r1, $r3
+  ; CHECK:   renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.tmp3, align 2)
+  ; CHECK:   renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.tmp1, align 2)
   ; CHECK:   renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r12 = MVE_VADDVu16no_acc killed renamable $q0, 0, $noreg
   ; CHECK:   renamable $r3 = t2UXTAH killed renamable $r3, killed renamable $r12, 0, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.2
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.2
   ; CHECK: bb.3.while.end:
   ; CHECK:   liveins: $r3
   ; CHECK:   $r0 = tMOVr killed $r3, 14 /* CC::al */, $noreg

diff  --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp
index 51ae2b6aca68..8d6cb631d692 100644
--- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp
+++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp
@@ -407,6 +407,18 @@ TEST(MachineInstrValidTailPredication, IsCorrect) {
     case MVE_VADD_qr_i16:
     case MVE_VADD_qr_i32:
     case MVE_VADD_qr_i8:
+    case MVE_VADDVs16acc:
+    case MVE_VADDVs16no_acc:
+    case MVE_VADDVs32acc:
+    case MVE_VADDVs32no_acc:
+    case MVE_VADDVs8acc:
+    case MVE_VADDVs8no_acc:
+    case MVE_VADDVu16acc:
+    case MVE_VADDVu16no_acc:
+    case MVE_VADDVu32acc:
+    case MVE_VADDVu32no_acc:
+    case MVE_VADDVu8acc:
+    case MVE_VADDVu8no_acc:
     case MVE_VADDf16:
     case MVE_VADDf32:
     case MVE_VADDi16:


        


More information about the llvm-commits mailing list