[llvm] 73346f5 - [ARM] Introduce a MQPRCopy

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 7 04:52:17 PDT 2021


Author: David Green
Date: 2021-10-07T12:52:12+01:00
New Revision: 73346f58486d29c0d2687af1208fa146168d62d8

URL: https://github.com/llvm/llvm-project/commit/73346f58486d29c0d2687af1208fa146168d62d8
DIFF: https://github.com/llvm/llvm-project/commit/73346f58486d29c0d2687af1208fa146168d62d8.diff

LOG: [ARM] Introduce a MQPRCopy

Currently when creating tail predicated loops, we need to validate that
all the live-outs of a loop will be equivalent with and without tail
predication, and if they are not we cannot legally create a
tail-predicated loop, leaving expensive vctp and vpst instructions in
the loop. These notably can include register-allocation instructions
like stack loads and stores, and copys lowered from COPYs to MVE_VORRs.

Instead of trying to prove this is valid late in the pipeline, this
patch introduces a MQPRCopy pseudo instruction that COPY is lowered to.
This can then either be converted to a MVE_VORR where possible, or to a
couple of VMOVD instructions if not. This way they do not behave
differently within and outside of tail-predications regions, and we can
know by construction that they are always valid. The idea is that we can
do the same with stack load and stores, converting them to VLDR/VSTR or
VLDM/VSTM where required to prove tail predication is always valid.

This does unfortunately mean inserting multiple VMOVD instructions,
instead of a single MVE_VORR, but my experiments show it to be an
improvement in general.

Differential Revision: https://reviews.llvm.org/D111048

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/lib/Target/ARM/ARMInstrMVE.td
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
    llvm/unittests/Target/ARM/MachineInstrTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c338c1e778446..b2363ee2ef57e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -916,7 +916,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
-    Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
+    Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
 
   if (Opc) {
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
@@ -925,7 +925,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       MIB.addReg(SrcReg, getKillRegState(KillSrc));
     if (Opc == ARM::MVE_VORR)
       addUnpredicatedMveVpredROp(MIB, DestReg);
-    else
+    else if (Opc != ARM::MQPRCopy)
       MIB.add(predOps(ARMCC::AL));
     return;
   }

diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index c0aad6631b525..6977300372778 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -6975,6 +6975,17 @@ let mayLoad = 1, hasSideEffects = 0 in {
 }
 }
 
+// Pseudo for lowering MVE Q register COPYs. These will usually get converted
+// to a "MVE_VORR dst, src, src", but may behave 
diff erently in tail predicated
+// loops to ensure the whole register is copied, not a subset from a
+// tail-predicated MVE_VORR. In the event we cannot prove a MVE_VORR is valid,
+// it will become a pair of VMOVD instructions for each half of the Q register.
+let Predicates = [HasMVEInt], hasSideEffects = 0, isMoveReg = 1,
+    D = MVEDomain in {
+  def MQPRCopy : t2PseudoInst<(outs MQPR:$dst), (ins MQPR:$src),
+                              8, NoItinerary, []>;
+}
+
 
 //===----------------------------------------------------------------------===//
 // Patterns

diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 23e87e1790b33..099afe3926eca 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -376,10 +376,11 @@ namespace {
     MachineInstr *Dec = nullptr;
     MachineInstr *End = nullptr;
     MachineOperand TPNumElements;
-    SmallVector<MachineInstr*, 4> VCTPs;
-    SmallPtrSet<MachineInstr*, 4> ToRemove;
-    SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
-    SmallPtrSet<MachineInstr*, 4> DoubleWidthResultInstrs;
+    SmallVector<MachineInstr *, 4> VCTPs;
+    SmallPtrSet<MachineInstr *, 4> ToRemove;
+    SmallPtrSet<MachineInstr *, 4> BlockMasksToRecompute;
+    SmallPtrSet<MachineInstr *, 4> DoubleWidthResultInstrs;
+    SmallPtrSet<MachineInstr *, 4> VMOVCopies;
     bool Revert = false;
     bool CannotTailPredicate = false;
 
@@ -976,8 +977,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
     else if (!isPredicated && retainsOrReduces) {
       LLVM_DEBUG(dbgs() << "  Unpredicated instruction that retainsOrReduces: " << MI);
       return false;
-    }
-    else if (!isPredicated)
+    } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy)
       FalseLanesUnknown.insert(&MI);
   }
 
@@ -1052,10 +1052,20 @@ bool LowOverheadLoop::ValidateLiveOuts() {
   // any VPT predicated instruction is predicated upon VCTP. Any live-out
   // instruction needs to be predicated, so check this here. The instructions
   // in NonPredicated have been found to be a reduction that we can ensure its
-  // legality.
-  for (auto *MI : LiveOutMIs) {
-    if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
-      LLVM_DEBUG(dbgs() << "  Unable to handle live out: " << *MI);
+  // legality. Any MQPRCopy found will need to validate its input as if it was
+  // live out.
+  SmallVector<MachineInstr *> Worklist(LiveOutMIs.begin(), LiveOutMIs.end());
+  while (!Worklist.empty()) {
+    MachineInstr *MI = Worklist.pop_back_val();
+    if (MI->getOpcode() == ARM::MQPRCopy) {
+      VMOVCopies.insert(MI);
+      MachineInstr *CopySrc =
+          RDA.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg());
+      if (CopySrc)
+        Worklist.push_back(CopySrc);
+    } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
+      LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);
+      VMOVCopies.clear();
       return false;
     }
   }
@@ -1256,6 +1266,8 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
   bool RequiresExplicitPredication =
     (MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
   if (isDomainMVE(MI) && RequiresExplicitPredication) {
+    if (MI->getOpcode() == ARM::MQPRCopy)
+      return true;
     if (!IsUse && producesDoubleWidthResult(*MI)) {
       DoubleWidthResultInstrs.insert(MI);
       return true;
@@ -1739,6 +1751,29 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
     }
   };
 
+  // And VMOVCopies need to become 2xVMOVD for tail predication to be valid.
+  // Anything other MQPRCopy can be converted to MVE_VORR later on.
+  auto ExpandVMOVCopies = [this](SmallPtrSet<MachineInstr *, 4> &VMOVCopies) {
+    for (auto *MI : VMOVCopies) {
+      LLVM_DEBUG(dbgs() << "Converting copy to VMOVD: " << *MI);
+      assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+      MachineBasicBlock *MBB = MI->getParent();
+      Register Dst = MI->getOperand(0).getReg();
+      Register Src = MI->getOperand(1).getReg();
+      auto MIB1 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+                          ARM::D0 + (Dst - ARM::Q0) * 2)
+                      .addReg(ARM::D0 + (Src - ARM::Q0) * 2)
+                      .add(predOps(ARMCC::AL));
+      LLVM_DEBUG(dbgs() << " into " << *MIB1);
+      auto MIB2 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+                          ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
+                      .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
+                      .add(predOps(ARMCC::AL));
+      LLVM_DEBUG(dbgs() << " and  " << *MIB2);
+      MI->eraseFromParent();
+    }
+  };
+
   if (LoLoop.Revert) {
     if (isWhileLoopStart(*LoLoop.Start))
       RevertWhile(LoLoop.Start);
@@ -1749,6 +1784,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
     else
       RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
   } else {
+    ExpandVMOVCopies(LoLoop.VMOVCopies);
     LoLoop.Start = ExpandLoopStart(LoLoop);
     if (LoLoop.Start)
       RemoveDeadBranch(LoLoop.Start);
@@ -1793,6 +1829,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
     SmallVector<MachineInstr*, 4> Decs;
     SmallVector<MachineInstr*, 4> Ends;
     SmallVector<MachineInstr *, 4> EndDecs;
+    SmallVector<MachineInstr *, 4> MQPRCopies;
 
     for (auto &I : MBB) {
       if (isLoopStart(I))
@@ -1803,9 +1840,12 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
         Ends.push_back(&I);
       else if (I.getOpcode() == ARM::t2LoopEndDec)
         EndDecs.push_back(&I);
+      else if (I.getOpcode() == ARM::MQPRCopy)
+        MQPRCopies.push_back(&I);
     }
 
-    if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
+    if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty() &&
+        MQPRCopies.empty())
       continue;
 
     Changed = true;
@@ -1823,6 +1863,17 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
       RevertLoopEnd(End);
     for (auto *End : EndDecs)
       RevertLoopEndDec(End);
+    for (auto *MI : MQPRCopies) {
+      LLVM_DEBUG(dbgs() << "Converting copy to VORR: " << *MI);
+      assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+      MachineBasicBlock *MBB = MI->getParent();
+      auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::MVE_VORR),
+                         MI->getOperand(0).getReg())
+                     .add(MI->getOperand(1))
+                     .add(MI->getOperand(1));
+      addUnpredicatedMveVpredROp(MIB, MI->getOperand(0).getReg());
+      MI->eraseFromParent();
+    }
   }
   return Changed;
 }

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
index c4c69fcaf05b9..6d8ba975ac919 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
@@ -207,21 +207,16 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
 ; CHECK-NEXT:    uxtb r6, r6
 ; CHECK-NEXT:    movs r5, #120
 ; CHECK-NEXT:    mul lr, r4, r7
-; CHECK-NEXT:    adds r4, r2, #7
 ; CHECK-NEXT:    and.w r5, r5, r3, lsr #9
 ; CHECK-NEXT:    muls r6, r7, r6
-; CHECK-NEXT:    bic r4, r4, #7
 ; CHECK-NEXT:    vmov.i16 q0, #0x78
 ; CHECK-NEXT:    rsb.w r3, r7, #256
 ; CHECK-NEXT:    muls r5, r7, r5
 ; CHECK-NEXT:    lsls r7, r1, #1
-; CHECK-NEXT:    sub.w r1, r4, #8
-; CHECK-NEXT:    movs r4, #1
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #48] @ 16-byte Spill
 ; CHECK-NEXT:    vdup.16 q4, r6
 ; CHECK-NEXT:    mov.w r6, #2016
 ; CHECK-NEXT:    vdup.16 q0, lr
-; CHECK-NEXT:    add.w r1, r4, r1, lsr #3
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    vmov.i16 q2, #0xf8
 ; CHECK-NEXT:    vmov.i16 q5, #0xfc
@@ -236,29 +231,28 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-NEXT:    @ Child Loop BB1_4 Depth 2
 ; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r6, r2
-; CHECK-NEXT:    dls lr, r1
+; CHECK-NEXT:    dlstp.16 lr, r2
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  .LBB1_4: @ %vector.body
 ; CHECK-NEXT:    @ Parent Loop BB1_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vctp.16 r6
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vldrht.u16 q0, [r5]
+; CHECK-NEXT:    vldrh.u16 q0, [r5]
 ; CHECK-NEXT:    vshl.i16 q1, q0, #3
 ; CHECK-NEXT:    vand q1, q1, q2
-; CHECK-NEXT:    vmov q3, q2
+; CHECK-NEXT:    vmov.f64 d6, d4
+; CHECK-NEXT:    vmov.f64 d7, d5
 ; CHECK-NEXT:    vmov q2, q4
 ; CHECK-NEXT:    vmla.u16 q2, q1, r3
 ; CHECK-NEXT:    vshr.u16 q1, q0, #3
 ; CHECK-NEXT:    vand q1, q1, q5
-; CHECK-NEXT:    vmov q7, q5
-; CHECK-NEXT:    vmov q5, q4
+; CHECK-NEXT:    vmov.f64 d14, d10
+; CHECK-NEXT:    vmov.f64 d15, d11
+; CHECK-NEXT:    vmov.f64 d10, d8
+; CHECK-NEXT:    vmov.f64 d11, d9
 ; CHECK-NEXT:    vldrw.u32 q4, [sp, #32] @ 16-byte Reload
 ; CHECK-NEXT:    vshr.u16 q0, q0, #9
 ; CHECK-NEXT:    vmla.u16 q4, q1, r3
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #48] @ 16-byte Reload
-; CHECK-NEXT:    subs r6, #8
 ; CHECK-NEXT:    vand q0, q0, q1
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
 ; CHECK-NEXT:    vmla.u16 q1, q0, r3
@@ -266,15 +260,17 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
 ; CHECK-NEXT:    vshr.u16 q2, q4, #5
 ; CHECK-NEXT:    vand q2, q2, q6
 ; CHECK-NEXT:    vorr q0, q2, q0
-; CHECK-NEXT:    vmov q2, q3
+; CHECK-NEXT:    vmov.f64 d4, d6
+; CHECK-NEXT:    vmov.f64 d5, d7
 ; CHECK-NEXT:    vldrw.u32 q3, [sp] @ 16-byte Reload
-; CHECK-NEXT:    vmov q4, q5
-; CHECK-NEXT:    vmov q5, q7
+; CHECK-NEXT:    vmov.f64 d8, d10
+; CHECK-NEXT:    vmov.f64 d9, d11
+; CHECK-NEXT:    vmov.f64 d10, d14
+; CHECK-NEXT:    vmov.f64 d11, d15
 ; CHECK-NEXT:    vand q1, q1, q3
 ; CHECK-NEXT:    vorr q0, q0, q1
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vstrht.16 q0, [r5], #16
-; CHECK-NEXT:    le lr, .LBB1_4
+; CHECK-NEXT:    vstrh.16 q0, [r5], #16
+; CHECK-NEXT:    letp lr, .LBB1_4
 ; CHECK-NEXT:  @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
 ; CHECK-NEXT:    @ in Loop: Header=BB1_3 Depth=1
 ; CHECK-NEXT:    adds r4, #1

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
index 14eb0e1f57752..c376816d6706b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
@@ -193,7 +193,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r6, $r12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 0, $noreg, renamable $lr
-  ; CHECK-NEXT:   $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+  ; CHECK-NEXT:   $q2 = MVE_VORR $q0, $q0, 0, $noreg, $noreg, undef $q2
   ; CHECK-NEXT:   renamable $q1 = MVE_VAND killed renamable $q1, killed renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
   ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 0, killed $noreg, renamable $lr
   ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.3
@@ -257,7 +257,7 @@ body:             |
     renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
     MVE_VPST 8, implicit $vpr
     renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
-    $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+    $q2 = MQPRCopy $q0
     renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
     MVE_VPST 8, implicit $vpr
     renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
@@ -309,38 +309,30 @@ body:             |
   ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
   ; CHECK-NEXT:   renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
   ; CHECK-NEXT:   renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
-  ; CHECK-NEXT:   renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT:   liveins: $d0, $d1, $r0, $r1, $r2, $r3, $r6, $r12
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   $r5 = tMOVr $r2, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   $lr = t2DLS renamable $r6
+  ; CHECK-NEXT:   $lr = MVE_DLSTP_16 renamable $r2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12
+  ; CHECK-NEXT:   liveins: $lr, $d0, $d1, $r0, $r1, $r2, $r3, $r4, $r6, $r12
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg
-  ; CHECK-NEXT:   renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
-  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
-  ; CHECK-NEXT:   $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, renamable $lr, undef renamable $q2
+  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 0, $noreg, renamable $lr
+  ; CHECK-NEXT:   $d4 = VMOVD killed $d0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $d5 = VMOVD killed $d1, 14 /* CC::al */, $noreg
   ; CHECK-NEXT:   renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
-  ; CHECK-NEXT:   $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, renamable $lr, undef renamable $q0
-  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
-  ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
-  ; CHECK-NEXT:   $lr = t2LEUpdate killed renamable $lr, %bb.3
+  ; CHECK-NEXT:   $d0 = VMOVD killed $d4, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $d1 = VMOVD killed $d5, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 0, killed $noreg, renamable $lr
+  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
   ; CHECK-NEXT:   successors: %bb.5(0x04000000), %bb.2(0x7c000000)
@@ -401,9 +393,9 @@ body:             |
     renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
     MVE_VPST 8, implicit $vpr
     renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
-    $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+    $q2 = MQPRCopy $q0
     renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
-    $q0 = MVE_VORR $q2, $q2, 0, $noreg, renamable $lr, undef renamable $q0
+    $q0 = MQPRCopy $q2
     MVE_VPST 8, implicit $vpr
     renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
     renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr

diff  --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp
index 4113b79b07852..67b778b34c01a 100644
--- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp
+++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp
@@ -1417,6 +1417,7 @@ TEST(MachineInstr, MVEVecSize) {
     case MVE_VPNOT:
     case MVE_VPSEL:
     case MVE_VPST:
+    case MQPRCopy:
       return 0;
     case MVE_VABAVs16:
     case MVE_VABAVu16:
@@ -2098,4 +2099,4 @@ TEST(MachineInstr, MVEVecSize) {
               << MII->getName(i)
               << ": mismatched expectation for MVE vec size\n";
   }
-}
\ No newline at end of file
+}


        


More information about the llvm-commits mailing list