[llvm] 45ec3a3 - [PowerPC] Fix for excessive ACC copies due to PHI nodes

Baptiste Saleil via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 3 07:52:12 PST 2020


Author: Baptiste Saleil
Date: 2020-12-03T09:51:23-06:00
New Revision: 45ec3a37b0a54e34e8f47cdac2be495838f93675

URL: https://github.com/llvm/llvm-project/commit/45ec3a37b0a54e34e8f47cdac2be495838f93675
DIFF: https://github.com/llvm/llvm-project/commit/45ec3a37b0a54e34e8f47cdac2be495838f93675.diff

LOG: [PowerPC] Fix for excessive ACC copies due to PHI nodes

When using accumulators in loops, they are passed around in PHI nodes of unprimed
accumulators, causing the generation of additional prime/unprime instructions.
This patch detects these cases and changes these PHI nodes to primed accumulator
PHI nodes. We also add IR and MIR test cases for several PHI node cases.

Differential Revision: https://reviews.llvm.org/D91391

Added: 
    llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
    llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir

Modified: 
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 827d3c4693b9..633f216388d0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves(
   TOCSaves[MI] = Keep;
 }
 
+// This function returns a list of all PHI nodes in the tree starting from
+// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
+// The list initially only contains the root PHI. When we visit a PHI node, we
+// add it to the list. We continue to look for other PHI node operands while
+// there are nodes to visit in the list. The function returns false if the
+// optimization cannot be applied on this tree.
+static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
+                                   MachineInstr *RootPHI,
+                                   SmallVectorImpl<MachineInstr *> &PHIs) {
+  PHIs.push_back(RootPHI);
+  unsigned VisitedIndex = 0;
+  while (VisitedIndex < PHIs.size()) {
+    MachineInstr *VisitedPHI = PHIs[VisitedIndex];
+    for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
+         PHIOp != NumOps; PHIOp += 2) {
+      Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
+      if (!Register::isVirtualRegister(RegOp))
+        return false;
+      MachineInstr *Instr = MRI->getVRegDef(RegOp);
+      // While collecting the PHI nodes, we check if they can be converted (i.e.
+      // all the operands are either copies, implicit defs or PHI nodes).
+      unsigned Opcode = Instr->getOpcode();
+      if (Opcode == PPC::COPY) {
+        Register Reg = Instr->getOperand(1).getReg();
+        if (!Register::isVirtualRegister(Reg) ||
+            MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
+          return false;
+      } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
+        return false;
+      // If we detect a cycle in the PHI nodes, we exit. It would be
+      // possible to change cycles as well, but that would add a lot
+      // of complexity for a case that is unlikely to occur with MMA
+      // code.
+      if (Opcode != PPC::PHI)
+        continue;
+      if (std::find(PHIs.begin(), PHIs.end(), Instr) != PHIs.end())
+        return false;
+      PHIs.push_back(Instr);
+    }
+    VisitedIndex++;
+  }
+  return true;
+}
+
+// This function changes the unprimed accumulator PHI nodes in the PHIs list to
+// primed accumulator PHI nodes. The list is traversed in reverse order to
+// change all the PHI operands of a PHI node before changing the node itself.
+// We keep a map to associate each changed PHI node to its non-changed form.
+static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
+                                   MachineRegisterInfo *MRI,
+                                   SmallVectorImpl<MachineInstr *> &PHIs,
+                                   Register Dst) {
+  DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
+  for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
+    MachineInstr *PHI = *It;
+    SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
+    // We check if the current PHI node can be changed by looking at its
+    // operands. If all the operands are either copies from primed
+    // accumulators, implicit definitions or other unprimed accumulator
+    // PHI nodes, we change it.
+    for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
+         PHIOp += 2) {
+      Register RegOp = PHI->getOperand(PHIOp).getReg();
+      MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
+      unsigned Opcode = PHIInput->getOpcode();
+      assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
+              Opcode == PPC::PHI) &&
+             "Unexpected instruction");
+      if (Opcode == PPC::COPY) {
+        assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
+                   &PPC::ACCRCRegClass &&
+               "Unexpected register class");
+        PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
+      } else if (Opcode == PPC::IMPLICIT_DEF) {
+        Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+        BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
+                TII->get(PPC::IMPLICIT_DEF), AccReg);
+        PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
+                          PHI->getOperand(PHIOp + 1)});
+      } else if (Opcode == PPC::PHI) {
+        // We found a PHI operand. At this point we know this operand
+        // has already been changed so we get its associated changed form
+        // from the map.
+        assert(ChangedPHIMap.count(PHIInput) == 1 &&
+               "This PHI node should have already been changed.");
+        MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
+        PHIOps.push_back({MachineOperand::CreateReg(
+                              PrimedAccPHI->getOperand(0).getReg(), false),
+                          PHI->getOperand(PHIOp + 1)});
+      }
+    }
+    Register AccReg = Dst;
+    // If the PHI node we are changing is the root node, the register it defines
+    // will be the destination register of the original copy (of the PHI def).
+    // For all other PHI's in the list, we need to create another primed
+    // accumulator virtual register as the PHI will no longer define the
+    // unprimed accumulator.
+    if (PHI != PHIs[0])
+      AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
+    MachineInstrBuilder NewPHI = BuildMI(
+        *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
+    for (auto RegMBB : PHIOps)
+      NewPHI.add(RegMBB.first).add(RegMBB.second);
+    ChangedPHIMap[PHI] = NewPHI.getInstr();
+  }
+}
+
 // Perform peephole optimizations.
 bool PPCMIPeephole::simplifyCode(void) {
   bool Simplified = false;
@@ -321,6 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) {
 
       default:
         break;
+      case PPC::COPY: {
+        Register Src = MI.getOperand(1).getReg();
+        Register Dst = MI.getOperand(0).getReg();
+        if (!Register::isVirtualRegister(Src) ||
+            !Register::isVirtualRegister(Dst))
+          break;
+        if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
+            MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
+          break;
+
+        // We are copying an unprimed accumulator to a primed accumulator.
+        // If the input to the copy is a PHI that is fed only by (i) copies in
+        // the other direction (ii) implicitly defined unprimed accumulators or
+        // (iii) other PHI nodes satisfying (i) and (ii), we can change
+        // the PHI to a PHI on primed accumulators (as long as we also change
+        // its operands). To detect and change such copies, we first get a list
+        // of all the PHI nodes starting from the root PHI node in BFS order.
+        // We then visit all these PHI nodes to check if they can be changed to
+        // primed accumulator PHI nodes and if so, we change them.
+        MachineInstr *RootPHI = MRI->getVRegDef(Src);
+        if (RootPHI->getOpcode() != PPC::PHI)
+          break;
+
+        SmallVector<MachineInstr *, 4> PHIs;
+        if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
+          break;
+
+        convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
+
+        ToErase = &MI;
+        break;
+      }
       case PPC::LI:
       case PPC::LI8: {
         // If we are materializing a zero, look for any use operands for which

diff  --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
new file mode 100644
index 000000000000..45a0c68df520
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
+declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
+define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
+; CHECK-LABEL: testPHI1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpwi r5, 3
+; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    blt cr0, .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    clrldi r6, r5, 32
+; CHECK-NEXT:    addi r5, r4, 32
+; CHECK-NEXT:    addi r6, r6, -2
+; CHECK-NEXT:    lxv vs4, 0(r4)
+; CHECK-NEXT:    lxv vs5, 16(r4)
+; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lxv vs6, 0(r5)
+; CHECK-NEXT:    addi r5, r5, 16
+; CHECK-NEXT:    xvf64gerpp acc0, vsp4, vs6
+; CHECK-NEXT:    bdnz .LBB0_2
+; CHECK-NEXT:  .LBB0_3: # %for.cond.cleanup
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testPHI1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    cmpwi r5, 3
+; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    blt cr0, .LBB0_3
+; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-BE-NEXT:    clrldi r6, r5, 32
+; CHECK-BE-NEXT:    addi r5, r4, 32
+; CHECK-BE-NEXT:    addi r6, r6, -2
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    mtctr r6
+; CHECK-BE-NEXT:    .p2align 4
+; CHECK-BE-NEXT:  .LBB0_2: # %for.body
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    lxv vs6, 0(r5)
+; CHECK-BE-NEXT:    addi r5, r5, 16
+; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp4, vs6
+; CHECK-BE-NEXT:    bdnz .LBB0_2
+; CHECK-BE-NEXT:  .LBB0_3: # %for.cond.cleanup
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, <16 x i8>* %Src, align 16
+  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
+  %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
+  %2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
+  %3 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+  %cmp11 = icmp sgt i32 %Len, 2
+  br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext i32 %Len to i64
+  br label %for.body
+
+for.cond.cleanup:
+  %Acc.0.lcssa = phi <512 x i1> [ %3, %entry ], [ %13, %for.body ]
+  %4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %Acc.0.lcssa)
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 0
+  store <16 x i8> %5, <16 x i8>* %Dst, align 16
+  %6 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 1
+  %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 1
+  store <16 x i8> %6, <16 x i8>* %7, align 16
+  %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 2
+  %9 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 2
+  store <16 x i8> %8, <16 x i8>* %9, align 16
+  %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %4, 3
+  %11 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 3
+  store <16 x i8> %10, <16 x i8>* %11, align 16
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 2, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %Acc.012 = phi <512 x i1> [ %3, %for.body.preheader ], [ %13, %for.body ]
+  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 %indvars.iv
+  %12 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16
+  %13 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %Acc.012, <256 x i1> %2, <16 x i8> %12)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>)
+define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
+; CHECK-LABEL: testPHI2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv vs4, 0(r4)
+; CHECK-NEXT:    lxv vs5, 16(r4)
+; CHECK-NEXT:    lxv vs6, 32(r4)
+; CHECK-NEXT:    cmpwi r5, 4
+; CHECK-NEXT:    xvf64ger acc0, vsp4, vs6
+; CHECK-NEXT:    blt cr0, .LBB1_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    addi r4, r4, 48
+; CHECK-NEXT:    clrldi r5, r5, 32
+; CHECK-NEXT:    addi r5, r5, -3
+; CHECK-NEXT:    mtctr r5
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lxv vs6, 0(r4)
+; CHECK-NEXT:    addi r4, r4, 16
+; CHECK-NEXT:    xvf64gerpp acc0, vsp4, vs6
+; CHECK-NEXT:    bdnz .LBB1_2
+; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs3, 0(r3)
+; CHECK-NEXT:    stxv vs2, 16(r3)
+; CHECK-NEXT:    stxv vs1, 32(r3)
+; CHECK-NEXT:    stxv vs0, 48(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testPHI2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    lxv vs6, 32(r4)
+; CHECK-BE-NEXT:    cmpwi r5, 4
+; CHECK-BE-NEXT:    xvf64ger acc0, vsp4, vs6
+; CHECK-BE-NEXT:    blt cr0, .LBB1_3
+; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-BE-NEXT:    addi r4, r4, 48
+; CHECK-BE-NEXT:    clrldi r5, r5, 32
+; CHECK-BE-NEXT:    addi r5, r5, -3
+; CHECK-BE-NEXT:    mtctr r5
+; CHECK-BE-NEXT:    .p2align 4
+; CHECK-BE-NEXT:  .LBB1_2: # %for.body
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    lxv vs6, 0(r4)
+; CHECK-BE-NEXT:    addi r4, r4, 16
+; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp4, vs6
+; CHECK-BE-NEXT:    bdnz .LBB1_2
+; CHECK-BE-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, <16 x i8>* %Src, align 16
+  %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 1
+  %1 = load <16 x i8>, <16 x i8>* %arrayidx1, align 16
+  %2 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %0, <16 x i8> %1)
+  %arrayidx2 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 2
+  %3 = load <16 x i8>, <16 x i8>* %arrayidx2, align 16
+  %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %2, <16 x i8> %3)
+  %cmp14 = icmp sgt i32 %Len, 3
+  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext i32 %Len to i64
+  br label %for.body
+
+for.cond.cleanup:
+  %Acc.0.lcssa = phi <512 x i1> [ %4, %entry ], [ %14, %for.body ]
+  %5 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %Acc.0.lcssa)
+  %6 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 0
+  store <16 x i8> %6, <16 x i8>* %Dst, align 16
+  %7 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 1
+  %8 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 1
+  store <16 x i8> %7, <16 x i8>* %8, align 16
+  %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 2
+  %10 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 2
+  store <16 x i8> %9, <16 x i8>* %10, align 16
+  %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %5, 3
+  %12 = getelementptr inbounds <16 x i8>, <16 x i8>* %Dst, i64 3
+  store <16 x i8> %11, <16 x i8>* %12, align 16
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 3, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %Acc.015 = phi <512 x i1> [ %4, %for.body.preheader ], [ %14, %for.body ]
+  %arrayidx3 = getelementptr inbounds <16 x i8>, <16 x i8>* %Src, i64 %indvars.iv
+  %13 = load <16 x i8>, <16 x i8>* %arrayidx3, align 16
+  %14 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %Acc.015, <256 x i1> %2, <16 x i8> %13)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; This test uses an unprimed accumulator PHI node with two operands: an
+; implicitely defined unprimed accumulator and the unprimed result of the call
+; to xvf64gerpp. The compiler should replace this PHI node by a primed
+; accumulator PHI node.
+define void @testImplicitDef(<16 x i8>* %ptr) {
+; CHECK-LABEL: testImplicitDef:
+; CHECK:       # %bb.0: # %label1
+; CHECK-NEXT:    # implicit-def: $acc0
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %label2
+; CHECK-NEXT:    xvf64gerpp acc0, vsp0, vs0
+; CHECK-NEXT:  .LBB2_2: # %label3
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 0(r3)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testImplicitDef:
+; CHECK-BE:       # %bb.0: # %label1
+; CHECK-BE-NEXT:    # implicit-def: $acc0
+; CHECK-BE-NEXT:    bc 12, 4*cr5+lt, .LBB2_2
+; CHECK-BE-NEXT:  # %bb.1: # %label2
+; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp0, vs0
+; CHECK-BE-NEXT:  .LBB2_2: # %label3
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    blr
+label1:
+  br i1 undef, label %label3, label %label2
+
+label2:
+  %0 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> undef, <256 x i1> undef, <16 x i8> undef)
+  br label %label3
+
+label3:
+  %1 = phi <512 x i1> [ undef, %label1 ], [ %0, %label2 ]
+  %2 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %1)
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %2, 3
+  store <16 x i8> %3, <16 x i8>* %ptr, align 16
+  ret void
+}
+
+; This test uses an unprimed accumulator PHI node with an unprimed accumulator
+; PHI node operand. The compiler should replace these PHI nodes by primed
+; accumulator PHI nodes.
+declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
+define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %count, <512 x i1>* nocapture %ptr, <16 x i8> %vc) {
+; CHECK-LABEL: testNestedPHI:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    beq cr0, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %if.then
+; CHECK-NEXT:    xvf32gernp acc0, v2, v2
+; CHECK-NEXT:    cmpwi r4, 1
+; CHECK-NEXT:    bge cr0, .LBB3_3
+; CHECK-NEXT:    b .LBB3_5
+; CHECK-NEXT:  .LBB3_2:
+; CHECK-NEXT:    # implicit-def: $acc0
+; CHECK-NEXT:    cmpwi r4, 1
+; CHECK-NEXT:    blt cr0, .LBB3_5
+; CHECK-NEXT:  .LBB3_3: # %for.body.preheader
+; CHECK-NEXT:    clrldi r3, r4, 32
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB3_4: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    xvf32gernp acc0, v2, v2
+; CHECK-NEXT:    bdnz .LBB3_4
+; CHECK-NEXT:  .LBB3_5: # %for.cond.cleanup
+; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    stxv vs0, 48(r5)
+; CHECK-NEXT:    stxv vs1, 32(r5)
+; CHECK-NEXT:    stxv vs2, 16(r5)
+; CHECK-NEXT:    stxv vs3, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: testNestedPHI:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    cmplwi r3, 0
+; CHECK-BE-NEXT:    beq cr0, .LBB3_2
+; CHECK-BE-NEXT:  # %bb.1: # %if.then
+; CHECK-BE-NEXT:    xvf32gernp acc0, v2, v2
+; CHECK-BE-NEXT:    cmpwi r4, 1
+; CHECK-BE-NEXT:    bge cr0, .LBB3_3
+; CHECK-BE-NEXT:    b .LBB3_5
+; CHECK-BE-NEXT:  .LBB3_2:
+; CHECK-BE-NEXT:    # implicit-def: $acc0
+; CHECK-BE-NEXT:    cmpwi r4, 1
+; CHECK-BE-NEXT:    blt cr0, .LBB3_5
+; CHECK-BE-NEXT:  .LBB3_3: # %for.body.preheader
+; CHECK-BE-NEXT:    clrldi r3, r4, 32
+; CHECK-BE-NEXT:    mtctr r3
+; CHECK-BE-NEXT:    .p2align 4
+; CHECK-BE-NEXT:  .LBB3_4: # %for.body
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    xvf32gernp acc0, v2, v2
+; CHECK-BE-NEXT:    bdnz .LBB3_4
+; CHECK-BE-NEXT:  .LBB3_5: # %for.cond.cleanup
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    stxv vs1, 16(r5)
+; CHECK-BE-NEXT:    stxv vs0, 0(r5)
+; CHECK-BE-NEXT:    stxv vs3, 48(r5)
+; CHECK-BE-NEXT:    stxv vs2, 32(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %tobool.not = icmp eq i32 %cond, 0
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:
+  %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> undef, <16 x i8> %vc, <16 x i8> %vc)
+  br label %if.end
+
+if.end:
+  %vq.0 = phi <512 x i1> [ %0, %if.then ], [ undef, %entry ]
+  %cmp9 = icmp sgt i32 %count, 0
+  br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  %vq.1.lcssa = phi <512 x i1> [ %vq.0, %if.end ], [ %1, %for.body ]
+  store <512 x i1> %vq.1.lcssa, <512 x i1>* %ptr, align 64
+  ret i32 0
+
+for.body:
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
+  %vq.110 = phi <512 x i1> [ %1, %for.body ], [ %vq.0, %if.end ]
+  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %vq.110, <16 x i8> %vc, <16 x i8> %vc)
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond.not = icmp eq i32 %inc, %count
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

diff  --git a/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir
new file mode 100644
index 000000000000..5ebd8980d65b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir
@@ -0,0 +1,825 @@
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 %s -o - \
+# RUN:   -run-pass=ppc-mi-peepholes -verify-machineinstrs | FileCheck %s
+
+# Test the peephole replacing unprimed accumulator PHI nodes by primed
+# accumulator PHI nodes. We have a test for the simple case (PHI nodes with COPY
+# operands), a test for PHI nodes with IMPLICIT_DEF operands, a test for PHI
+# nodes with operands being other PHI nodes on unprimed accumulators and a test
+# with an unprimed accumulator PHI node cycle.
+
+--- |
+  define dso_local void @phiCopy(i32 signext %i, <16 x i8> %vc, <512 x i1>* nocapture %ptr) {
+  entry:
+    %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
+    %tobool.not = icmp eq i32 %i, 0
+    br i1 %tobool.not, label %if.end, label %if.then
+
+  if.then:
+    %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
+    br label %if.end
+
+  if.end:
+    %vq.0 = phi <512 x i1> [ %1, %if.then ], [ %0, %entry ]
+    store <512 x i1> %vq.0, <512 x i1>* %ptr, align 64
+    ret void
+  }
+
+  declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
+
+  declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
+
+  define dso_local void @phiCopyUndef(i32 signext %i, <16 x i8> %vc, <512 x i1>* nocapture %ptr) {
+  entry:
+    %tobool.not = icmp eq i32 %i, 0
+    br i1 %tobool.not, label %if.end, label %if.then
+
+  if.then:
+    %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> undef, <16 x i8> %vc, <16 x i8> %vc)
+    br label %if.end
+
+  if.end:
+    %vq.0 = phi <512 x i1> [ %0, %if.then ], [ undef, %entry ]
+    store <512 x i1> %vq.0, <512 x i1>* %ptr, align 64
+    ret void
+  }
+
+  define dso_local void @phiPhis(i32 signext %i, <16 x i8> %vc, <512 x i1>* nocapture %ptr) {
+  entry:
+    %cmp6 = icmp sgt i32 %i, 0
+    br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:
+    %0 = add i32 %i, -1
+    %xtraiter = and i32 %i, 7
+    %1 = icmp ult i32 %0, 7
+    br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
+
+  for.body.preheader.new:
+    %unroll_iter = and i32 %i, -8
+    %2 = add i32 %unroll_iter, -8
+    %3 = zext i32 %2 to i64
+    %4 = lshr i64 %3, 3
+    %5 = add nuw nsw i64 %4, 1
+    call void @llvm.set.loop.iterations.i64(i64 %5)
+    br label %for.body
+
+  for.cond.cleanup.loopexit.unr-lcssa:
+    %vq.07.unr = phi <512 x i1> [ undef, %for.body.preheader ], [ %18, %for.body ]
+    %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+    br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil.preheader
+
+  for.body.epil.preheader:
+    %6 = add nsw i32 %xtraiter, -1
+    %7 = zext i32 %6 to i64
+    %8 = add nuw nsw i64 %7, 1
+    call void @llvm.set.loop.iterations.i64(i64 %8)
+    br label %for.body.epil
+
+  for.body.epil:
+    %vq.07.epil = phi <512 x i1> [ %9, %for.body.epil ], [ %vq.07.unr, %for.body.epil.preheader ]
+    %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %vq.07.epil, <16 x i8> %vc, <16 x i8> %vc)
+    %10 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %10, label %for.body.epil, label %for.cond.cleanup
+
+  for.cond.cleanup:
+    %vq.0.lcssa = phi <512 x i1> [ undef, %entry ], [ %vq.07.unr, %for.cond.cleanup.loopexit.unr-lcssa ], [ %9, %for.body.epil ]
+    %add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %ptr, i64 1
+    store <512 x i1> %vq.0.lcssa, <512 x i1>* %add.ptr, align 64
+    ret void
+
+  for.body:
+    %vq.07 = phi <512 x i1> [ undef, %for.body.preheader.new ], [ %18, %for.body ]
+    %11 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %vq.07, <16 x i8> %vc, <16 x i8> %vc)
+    %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %11, <16 x i8> %vc, <16 x i8> %vc)
+    %13 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %12, <16 x i8> %vc, <16 x i8> %vc)
+    %14 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %13, <16 x i8> %vc, <16 x i8> %vc)
+    %15 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %14, <16 x i8> %vc, <16 x i8> %vc)
+    %16 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %15, <16 x i8> %vc, <16 x i8> %vc)
+    %17 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %16, <16 x i8> %vc, <16 x i8> %vc)
+    %18 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %17, <16 x i8> %vc, <16 x i8> %vc)
+    %19 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %19, label %for.body, label %for.cond.cleanup.loopexit.unr-lcssa
+  }
+
+  define dso_local void @phiCycle(i32 signext %i, <16 x i8> %vc, <512 x i1>* nocapture %ptr) {
+  entry:
+    %cmp6 = icmp sgt i32 %i, 0
+    br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:
+    %0 = add i32 %i, -1
+    %xtraiter = and i32 %i, 7
+    %1 = icmp ult i32 %0, 7
+    br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new
+
+  for.body.preheader.new:
+    %unroll_iter = and i32 %i, -8
+    %2 = add i32 %unroll_iter, -8
+    %3 = zext i32 %2 to i64
+    %4 = lshr i64 %3, 3
+    %5 = add nuw nsw i64 %4, 1
+    call void @llvm.set.loop.iterations.i64(i64 %5)
+    br label %for.body
+
+  for.cond.cleanup.loopexit.unr-lcssa:
+    %vq.07.unr = phi <512 x i1> [ undef, %for.body.preheader ], [ %18, %for.body ], [ %vq.07.epil, %for.body.epil ]
+    %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+    br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil.preheader
+
+  for.body.epil.preheader:
+    %6 = add nsw i32 %xtraiter, -1
+    %7 = zext i32 %6 to i64
+    %8 = add nuw nsw i64 %7, 1
+    call void @llvm.set.loop.iterations.i64(i64 %8)
+    br label %for.body.epil
+
+  for.body.epil:
+    %vq.07.epil = phi <512 x i1> [ %9, %for.body.epil ], [ %vq.07.unr, %for.body.epil.preheader ]
+    %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %vq.07.epil, <16 x i8> %vc, <16 x i8> %vc)
+    %10 = call i1 @llvm.loop.decrement.i64(i64 1)
+    %test = icmp ult i32 %0, 7
+    br i1 %test, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.epil
+    ;br i1 %10, label %for.body.epil, label %for.cond.cleanup
+
+  for.cond.cleanup:
+    %vq.0.lcssa = phi <512 x i1> [ undef, %entry ], [ %vq.07.unr, %for.cond.cleanup.loopexit.unr-lcssa ]
+    %add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %ptr, i64 1
+    store <512 x i1> %vq.0.lcssa, <512 x i1>* %add.ptr, align 64
+    ret void
+
+  for.body:
+    %vq.07 = phi <512 x i1> [ undef, %for.body.preheader.new ], [ %18, %for.body ]
+    %11 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %vq.07, <16 x i8> %vc, <16 x i8> %vc)
+    %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %11, <16 x i8> %vc, <16 x i8> %vc)
+    %13 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %12, <16 x i8> %vc, <16 x i8> %vc)
+    %14 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %13, <16 x i8> %vc, <16 x i8> %vc)
+    %15 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %14, <16 x i8> %vc, <16 x i8> %vc)
+    %16 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %15, <16 x i8> %vc, <16 x i8> %vc)
+    %17 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %16, <16 x i8> %vc, <16 x i8> %vc)
+    %18 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %17, <16 x i8> %vc, <16 x i8> %vc)
+    %19 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %19, label %for.body, label %for.cond.cleanup.loopexit.unr-lcssa
+  }
+
+  declare void @llvm.set.loop.iterations.i64(i64)
+
+  declare i1 @llvm.loop.decrement.i64(i64)
+
+...
+---
+name:            phiCopy
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: uaccrc, preferred-register: '' }
+  - { id: 1, class: uaccrc, preferred-register: '' }
+  - { id: 2, class: uaccrc, preferred-register: '' }
+  - { id: 3, class: g8rc, preferred-register: '' }
+  - { id: 4, class: vrrc, preferred-register: '' }
+  - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 6, class: gprc, preferred-register: '' }
+  - { id: 7, class: accrc, preferred-register: '' }
+  - { id: 8, class: crrc, preferred-register: '' }
+  - { id: 9, class: vsrc, preferred-register: '' }
+  - { id: 10, class: accrc, preferred-register: '' }
+  - { id: 11, class: accrc, preferred-register: '' }
+  - { id: 12, class: accrc, preferred-register: '' }
+  - { id: 13, class: accrc, preferred-register: '' }
+  - { id: 14, class: vsrc, preferred-register: '' }
+  - { id: 15, class: vsrprc, preferred-register: '' }
+  - { id: 16, class: vsrprc, preferred-register: '' }
+  - { id: 17, class: vsrc, preferred-register: '' }
+  - { id: 18, class: vsrprc, preferred-register: '' }
+  - { id: 19, class: vsrprc, preferred-register: '' }
+  - { id: 20, class: vsrc, preferred-register: '' }
+  - { id: 21, class: vsrprc, preferred-register: '' }
+  - { id: 22, class: vsrprc, preferred-register: '' }
+  - { id: 23, class: vsrc, preferred-register: '' }
+  - { id: 24, class: vsrprc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%3' }
+  - { reg: '$v2', virtual-reg: '%4' }
+  - { reg: '$x7', virtual-reg: '%5' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $x3, $v2, $x7
+
+    %5:g8rc_and_g8rc_nox0 = COPY $x7
+    %4:vrrc = COPY $v2
+    %3:g8rc = COPY $x3
+    %6:gprc = COPY %3.sub_32
+    %7:accrc = XXSETACCZ
+    %0:uaccrc = COPY %7
+    %8:crrc = CMPLWI killed %6, 0
+    BCC 76, killed %8, %bb.2
+    B %bb.1
+
+  bb.1.if.then:
+    successors: %bb.2(0x80000000)
+
+    %9:vsrc = COPY %4
+    %11:accrc = COPY %7
+    %10:accrc = XVF32GERPP %11, %9, %9
+    %1:uaccrc = COPY %10
+
+  bb.2.if.end:
+    ; We check that the PHI node on primed accumulator is inserted after
+    ; the label.
+    ; CHECK-LABEL: name: phiCopy
+    ; CHECK-LABEL: bb.{{[0-9]}}.if.end:
+    ; CHECK-NEXT: :accrc = PHI %7, %bb.0, %10, %bb.1
+    ; CHECK-NEXT: %2:uaccrc = PHI
+    %2:uaccrc = PHI %0, %bb.0, %1, %bb.1
+    %13:accrc = COPY %2
+    %12:accrc = XXMFACC %13
+    %14:vsrc = COPY %12.sub_vsx1
+    %16:vsrprc = IMPLICIT_DEF
+    %15:vsrprc = INSERT_SUBREG %16, killed %14, %subreg.sub_vsx1
+    %17:vsrc = COPY %12.sub_vsx0
+    %18:vsrprc = INSERT_SUBREG %15, killed %17, %subreg.sub_vsx0
+    STXVP killed %18, 32, %5 :: (store 32 into %ir.ptr + 32)
+    %19:vsrprc = COPY %12.sub_pair1
+    %20:vsrc = COPY %19.sub_vsx1
+    %22:vsrprc = IMPLICIT_DEF
+    %21:vsrprc = INSERT_SUBREG %22, killed %20, %subreg.sub_vsx1
+    %23:vsrc = COPY %19.sub_vsx0
+    %24:vsrprc = INSERT_SUBREG %21, killed %23, %subreg.sub_vsx0
+    STXVP killed %24, 0, %5 :: (store 32 into %ir.ptr, align 64)
+    BLR8 implicit $lr8, implicit $rm
+
+...
+---
+name:            phiCopyUndef
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: uaccrc, preferred-register: '' }
+  - { id: 1, class: uaccrc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+  - { id: 3, class: vrrc, preferred-register: '' }
+  - { id: 4, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 5, class: uaccrc, preferred-register: '' }
+  - { id: 6, class: gprc, preferred-register: '' }
+  - { id: 7, class: crrc, preferred-register: '' }
+  - { id: 8, class: vsrc, preferred-register: '' }
+  - { id: 9, class: accrc, preferred-register: '' }
+  - { id: 10, class: uaccrc, preferred-register: '' }
+  - { id: 11, class: accrc, preferred-register: '' }
+  - { id: 12, class: accrc, preferred-register: '' }
+  - { id: 13, class: accrc, preferred-register: '' }
+  - { id: 14, class: vsrc, preferred-register: '' }
+  - { id: 15, class: vsrprc, preferred-register: '' }
+  - { id: 16, class: vsrprc, preferred-register: '' }
+  - { id: 17, class: vsrc, preferred-register: '' }
+  - { id: 18, class: vsrprc, preferred-register: '' }
+  - { id: 19, class: vsrprc, preferred-register: '' }
+  - { id: 20, class: vsrc, preferred-register: '' }
+  - { id: 21, class: vsrprc, preferred-register: '' }
+  - { id: 22, class: vsrprc, preferred-register: '' }
+  - { id: 23, class: vsrc, preferred-register: '' }
+  - { id: 24, class: vsrprc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%2' }
+  - { reg: '$v2', virtual-reg: '%3' }
+  - { reg: '$x7', virtual-reg: '%4' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.3(0x30000000), %bb.1(0x50000000)
+    liveins: $x3, $v2, $x7
+
+    %4:g8rc_and_g8rc_nox0 = COPY $x7
+    %3:vrrc = COPY $v2
+    %2:g8rc = COPY $x3
+    %6:gprc = COPY %2.sub_32
+    %7:crrc = CMPLWI killed %6, 0
+    BCC 68, killed %7, %bb.1
+
+  bb.3:
+    successors: %bb.2(0x80000000)
+
+    %5:uaccrc = IMPLICIT_DEF
+    B %bb.2
+
+  bb.1.if.then:
+    successors: %bb.2(0x80000000)
+
+    %8:vsrc = COPY %3
+    %10:uaccrc = IMPLICIT_DEF
+    %11:accrc = COPY %10
+    %9:accrc = XVF32GERPP %11, %8, %8
+    %0:uaccrc = COPY %9
+
+  bb.2.if.end:
+    ; We check that the PHI node on primed accumulator is inserted after
+    ; the label.
+    ; CHECK-LABEL: name: phiCopyUndef
+    ; CHECK-LABEL: bb.{{[0-9]}}.if.end:
+    ; CHECK-NEXT: :accrc = PHI
+    ; CHECK-NEXT: %1:uaccrc = PHI
+    %1:uaccrc = PHI %5, %bb.3, %0, %bb.1
+    %13:accrc = COPY %1
+    %12:accrc = XXMFACC %13
+    %14:vsrc = COPY %12.sub_vsx1
+    %16:vsrprc = IMPLICIT_DEF
+    %15:vsrprc = INSERT_SUBREG %16, killed %14, %subreg.sub_vsx1
+    %17:vsrc = COPY %12.sub_vsx0
+    %18:vsrprc = INSERT_SUBREG %15, killed %17, %subreg.sub_vsx0
+    STXVP killed %18, 32, %4 :: (store 32 into %ir.ptr + 32)
+    %19:vsrprc = COPY %12.sub_pair1
+    %20:vsrc = COPY %19.sub_vsx1
+    %22:vsrprc = IMPLICIT_DEF
+    %21:vsrprc = INSERT_SUBREG %22, killed %20, %subreg.sub_vsx1
+    %23:vsrc = COPY %19.sub_vsx0
+    %24:vsrprc = INSERT_SUBREG %21, killed %23, %subreg.sub_vsx0
+    STXVP killed %24, 0, %4 :: (store 32 into %ir.ptr, align 64)
+    BLR8 implicit $lr8, implicit $rm
+
+...
+---
+name:            phiPhis
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 1, class: uaccrc, preferred-register: '' }
+  - { id: 2, class: uaccrc, preferred-register: '' }
+  - { id: 3, class: uaccrc, preferred-register: '' }
+  - { id: 4, class: uaccrc, preferred-register: '' }
+  - { id: 5, class: uaccrc, preferred-register: '' }
+  - { id: 6, class: uaccrc, preferred-register: '' }
+  - { id: 7, class: g8rc, preferred-register: '' }
+  - { id: 8, class: vrrc, preferred-register: '' }
+  - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 10, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 11, class: uaccrc, preferred-register: '' }
+  - { id: 12, class: crrc, preferred-register: '' }
+  - { id: 13, class: uaccrc, preferred-register: '' }
+  - { id: 14, class: gprc, preferred-register: '' }
+  - { id: 15, class: crrc, preferred-register: '' }
+  - { id: 16, class: uaccrc, preferred-register: '' }
+  - { id: 17, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 18, class: gprc, preferred-register: '' }
+  - { id: 19, class: g8rc, preferred-register: '' }
+  - { id: 20, class: g8rc, preferred-register: '' }
+  - { id: 21, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 22, class: g8rc, preferred-register: '' }
+  - { id: 23, class: vsrc, preferred-register: '' }
+  - { id: 24, class: accrc, preferred-register: '' }
+  - { id: 25, class: accrc, preferred-register: '' }
+  - { id: 26, class: accrc, preferred-register: '' }
+  - { id: 27, class: accrc, preferred-register: '' }
+  - { id: 28, class: accrc, preferred-register: '' }
+  - { id: 29, class: accrc, preferred-register: '' }
+  - { id: 30, class: accrc, preferred-register: '' }
+  - { id: 31, class: accrc, preferred-register: '' }
+  - { id: 32, class: accrc, preferred-register: '' }
+  - { id: 33, class: crrc, preferred-register: '' }
+  - { id: 34, class: gprc, preferred-register: '' }
+  - { id: 35, class: g8rc, preferred-register: '' }
+  - { id: 36, class: g8rc, preferred-register: '' }
+  - { id: 37, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 38, class: g8rc, preferred-register: '' }
+  - { id: 39, class: vsrc, preferred-register: '' }
+  - { id: 40, class: accrc, preferred-register: '' }
+  - { id: 41, class: accrc, preferred-register: '' }
+  - { id: 42, class: accrc, preferred-register: '' }
+  - { id: 43, class: accrc, preferred-register: '' }
+  - { id: 44, class: vsrc, preferred-register: '' }
+  - { id: 45, class: vsrprc, preferred-register: '' }
+  - { id: 46, class: vsrprc, preferred-register: '' }
+  - { id: 47, class: vsrc, preferred-register: '' }
+  - { id: 48, class: vsrprc, preferred-register: '' }
+  - { id: 49, class: vsrprc, preferred-register: '' }
+  - { id: 50, class: vsrc, preferred-register: '' }
+  - { id: 51, class: vsrprc, preferred-register: '' }
+  - { id: 52, class: vsrprc, preferred-register: '' }
+  - { id: 53, class: vsrc, preferred-register: '' }
+  - { id: 54, class: vsrprc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%7' }
+  - { reg: '$v2', virtual-reg: '%8' }
+  - { reg: '$x7', virtual-reg: '%9' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.8(0x30000000)
+    liveins: $x3, $v2, $x7
+
+    %9:g8rc_and_g8rc_nox0 = COPY $x7
+    %8:vrrc = COPY $v2
+    %7:g8rc = COPY $x3
+    %10:gprc_and_gprc_nor0 = COPY %7.sub_32
+    %12:crrc = CMPWI %10, 1
+    BCC 4, killed %12, %bb.1
+
+  bb.8:
+    successors: %bb.6(0x80000000)
+
+    %11:uaccrc = IMPLICIT_DEF
+    B %bb.6
+
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x40000000), %bb.2(0x40000000)
+
+    %14:gprc = ADDI %10, -1
+    %0:gprc_and_gprc_nor0 = RLWINM %10, 0, 29, 31
+    %13:uaccrc = IMPLICIT_DEF
+    %15:crrc = CMPLWI killed %14, 7
+    BCC 12, killed %15, %bb.3
+    B %bb.2
+
+  bb.2.for.body.preheader.new:
+    successors: %bb.7(0x80000000)
+
+    %17:gprc_and_gprc_nor0 = RLWINM %10, 0, 0, 28
+    %18:gprc = ADDI killed %17, -8
+    %20:g8rc = IMPLICIT_DEF
+    %19:g8rc = INSERT_SUBREG %20, killed %18, %subreg.sub_32
+    %21:g8rc_and_g8rc_nox0 = RLWINM8 %19, 29, 3, 31
+    %22:g8rc = nuw nsw ADDI8 killed %21, 1
+    MTCTR8loop killed %22, implicit-def dead $ctr8
+    %16:uaccrc = IMPLICIT_DEF
+    B %bb.7
+
+  bb.3.for.cond.cleanup.loopexit.unr-lcssa:
+    successors: %bb.6(0x30000000), %bb.4(0x50000000)
+
+    %1:uaccrc = PHI %13, %bb.1, %6, %bb.7
+    %33:crrc = CMPLWI %0, 0
+    BCC 76, killed %33, %bb.6
+    B %bb.4
+
+  bb.4.for.body.epil.preheader:
+    successors: %bb.5(0x80000000)
+
+    %34:gprc = nsw ADDI %0, -1
+    %36:g8rc = IMPLICIT_DEF
+    %35:g8rc = INSERT_SUBREG %36, killed %34, %subreg.sub_32
+    %37:g8rc_and_g8rc_nox0 = RLDICL killed %35, 0, 32
+    %38:g8rc = nuw nsw ADDI8 killed %37, 1
+    MTCTR8loop killed %38, implicit-def dead $ctr8
+
+  bb.5.for.body.epil:
+    successors: %bb.5(0x7c000000), %bb.6(0x04000000)
+    ; We check that the PHI node on primed accumulator is inserted after
+    ; the label.
+    ; CHECK-LABEL: name: phiPhis
+    ; CHECK-LABEL: bb.{{[0-9]}}.for.body.epil:
+    ; CHECK-NEXT: successors: %bb.{{[0-9]}}(0x{{[0-9a-f]+}}), %bb.{{[0-9]}}(0x{{[0-9a-f]+}})
+    ; CHECK-NEXT: {{ }}
+    ; CHECK-NEXT: :accrc = PHI
+    ; CHECK-NEXT: %2:uaccrc = PHI
+    %2:uaccrc = PHI %1, %bb.4, %3, %bb.5
+    %39:vsrc = COPY %8
+    %41:accrc = COPY %2
+    %40:accrc = XVF32GERPP %41, %39, %39
+    %3:uaccrc = COPY %40
+    BDNZ8 %bb.5, implicit-def dead $ctr8, implicit $ctr8
+    B %bb.6
+
+  bb.6.for.cond.cleanup:
+    %4:uaccrc = PHI %11, %bb.8, %1, %bb.3, %3, %bb.5
+    %43:accrc = COPY %4
+    %42:accrc = XXMFACC %43
+    %44:vsrc = COPY %42.sub_vsx1
+    %46:vsrprc = IMPLICIT_DEF
+    %45:vsrprc = INSERT_SUBREG %46, killed %44, %subreg.sub_vsx1
+    %47:vsrc = COPY %42.sub_vsx0
+    %48:vsrprc = INSERT_SUBREG %45, killed %47, %subreg.sub_vsx0
+    STXVP killed %48, 96, %9 :: (store 32 into %ir.add.ptr + 32)
+    %49:vsrprc = COPY %42.sub_pair1
+    %50:vsrc = COPY %49.sub_vsx1
+    %52:vsrprc = IMPLICIT_DEF
+    %51:vsrprc = INSERT_SUBREG %52, killed %50, %subreg.sub_vsx1
+    %53:vsrc = COPY %49.sub_vsx0
+    %54:vsrprc = INSERT_SUBREG %51, killed %53, %subreg.sub_vsx0
+    STXVP killed %54, 64, %9 :: (store 32 into %ir.add.ptr, align 64)
+    BLR8 implicit $lr8, implicit $rm
+
+  bb.7.for.body:
+    successors: %bb.7(0x7c000000), %bb.3(0x04000000)
+
+    %5:uaccrc = PHI %16, %bb.2, %6, %bb.7
+    %23:vsrc = COPY %8
+    %25:accrc = COPY %5
+    %24:accrc = XVF32GERPP %25, %23, %23
+    %26:accrc = XVF32GERPP %24, %23, %23
+    %27:accrc = XVF32GERPP %26, %23, %23
+    %28:accrc = XVF32GERPP %27, %23, %23
+    %29:accrc = XVF32GERPP %28, %23, %23
+    %30:accrc = XVF32GERPP %29, %23, %23
+    %31:accrc = XVF32GERPP %30, %23, %23
+    %32:accrc = XVF32GERPP %31, %23, %23
+    %6:uaccrc = COPY %32
+    BDNZ8 %bb.7, implicit-def dead $ctr8, implicit $ctr8
+    B %bb.3
+
+...
+---
+name:            phiCycle
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 1, class: uaccrc, preferred-register: '' }
+  - { id: 2, class: uaccrc, preferred-register: '' }
+  - { id: 3, class: uaccrc, preferred-register: '' }
+  - { id: 4, class: uaccrc, preferred-register: '' }
+  - { id: 5, class: uaccrc, preferred-register: '' }
+  - { id: 6, class: uaccrc, preferred-register: '' }
+  - { id: 7, class: g8rc, preferred-register: '' }
+  - { id: 8, class: vrrc, preferred-register: '' }
+  - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 10, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 11, class: uaccrc, preferred-register: '' }
+  - { id: 12, class: crrc, preferred-register: '' }
+  - { id: 13, class: uaccrc, preferred-register: '' }
+  - { id: 14, class: gprc, preferred-register: '' }
+  - { id: 15, class: crrc, preferred-register: '' }
+  - { id: 16, class: uaccrc, preferred-register: '' }
+  - { id: 17, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 18, class: gprc, preferred-register: '' }
+  - { id: 19, class: g8rc, preferred-register: '' }
+  - { id: 20, class: g8rc, preferred-register: '' }
+  - { id: 21, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 22, class: g8rc, preferred-register: '' }
+  - { id: 23, class: vsrc, preferred-register: '' }
+  - { id: 24, class: accrc, preferred-register: '' }
+  - { id: 25, class: accrc, preferred-register: '' }
+  - { id: 26, class: accrc, preferred-register: '' }
+  - { id: 27, class: accrc, preferred-register: '' }
+  - { id: 28, class: accrc, preferred-register: '' }
+  - { id: 29, class: accrc, preferred-register: '' }
+  - { id: 30, class: accrc, preferred-register: '' }
+  - { id: 31, class: accrc, preferred-register: '' }
+  - { id: 32, class: accrc, preferred-register: '' }
+  - { id: 33, class: crrc, preferred-register: '' }
+  - { id: 34, class: gprc, preferred-register: '' }
+  - { id: 35, class: g8rc, preferred-register: '' }
+  - { id: 36, class: g8rc, preferred-register: '' }
+  - { id: 37, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 38, class: g8rc, preferred-register: '' }
+  - { id: 39, class: vsrc, preferred-register: '' }
+  - { id: 40, class: accrc, preferred-register: '' }
+  - { id: 41, class: accrc, preferred-register: '' }
+  - { id: 42, class: accrc, preferred-register: '' }
+  - { id: 43, class: accrc, preferred-register: '' }
+  - { id: 44, class: vsrc, preferred-register: '' }
+  - { id: 45, class: vsrprc, preferred-register: '' }
+  - { id: 46, class: vsrprc, preferred-register: '' }
+  - { id: 47, class: vsrc, preferred-register: '' }
+  - { id: 48, class: vsrprc, preferred-register: '' }
+  - { id: 49, class: vsrprc, preferred-register: '' }
+  - { id: 50, class: vsrc, preferred-register: '' }
+  - { id: 51, class: vsrprc, preferred-register: '' }
+  - { id: 52, class: vsrprc, preferred-register: '' }
+  - { id: 53, class: vsrc, preferred-register: '' }
+  - { id: 54, class: vsrprc, preferred-register: '' }
+liveins:
+  - { reg: '$x3', virtual-reg: '%7' }
+  - { reg: '$v2', virtual-reg: '%8' }
+  - { reg: '$x7', virtual-reg: '%9' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.8(0x30000000)
+    liveins: $x3, $v2, $x7
+
+    %9:g8rc_and_g8rc_nox0 = COPY $x7
+    %8:vrrc = COPY $v2
+    %7:g8rc = COPY $x3
+    %10:gprc_and_gprc_nor0 = COPY %7.sub_32
+    %12:crrc = CMPWI %10, 1
+    BCC 4, killed %12, %bb.1
+
+  bb.8:
+    successors: %bb.6(0x80000000)
+
+    %11:uaccrc = IMPLICIT_DEF
+    B %bb.6
+
+  bb.1.for.body.preheader:
+    successors: %bb.3(0x40000000), %bb.2(0x40000000)
+
+    %14:gprc = ADDI %10, -1
+    %0:gprc_and_gprc_nor0 = RLWINM %10, 0, 29, 31
+    %13:uaccrc = IMPLICIT_DEF
+    %15:crrc = CMPLWI %14, 7
+    BCC 12, killed %15, %bb.3
+    B %bb.2
+
+  bb.2.for.body.preheader.new:
+    successors: %bb.7(0x80000000)
+
+    %17:gprc_and_gprc_nor0 = RLWINM %10, 0, 0, 28
+    %18:gprc = ADDI killed %17, -8
+    %20:g8rc = IMPLICIT_DEF
+    %19:g8rc = INSERT_SUBREG %20, killed %18, %subreg.sub_32
+    %21:g8rc_and_g8rc_nox0 = RLWINM8 %19, 29, 3, 31
+    %22:g8rc = nuw nsw ADDI8 killed %21, 1
+    MTCTR8loop killed %22, implicit-def dead $ctr8
+    %16:uaccrc = IMPLICIT_DEF
+    B %bb.7
+
+  bb.3.for.cond.cleanup.loopexit.unr-lcssa:
+    successors: %bb.6(0x30000000), %bb.4(0x50000000)
+    ; We check that no phi node is inserted in the block.
+    ; CHECK-LABEL: name: phiCycle
+    ; CHECK-LABEL: bb.{{[0-9]}}.for.cond.cleanup.loopexit.unr-lcssa:
+    ; CHECK-NEXT: successors: %bb.{{[0-9]}}(0x{{[0-9a-f]+}}), %bb.{{[0-9]}}(0x{{[0-9a-f]+}})
+    ; CHECK-NEXT: {{ }}
+    ; CHECK-NEXT: %1:uaccrc = PHI
+    ; CHECK-NEXT: %33:crrc
+    %1:uaccrc = PHI %13, %bb.1, %6, %bb.7, %2, %bb.5
+    %33:crrc = CMPLWI %0, 0
+    BCC 76, killed %33, %bb.6
+    B %bb.4
+
+  bb.4.for.body.epil.preheader:
+    successors: %bb.5(0x80000000)
+
+    %34:gprc = nsw ADDI %0, -1
+    %36:g8rc = IMPLICIT_DEF
+    %35:g8rc = INSERT_SUBREG %36, killed %34, %subreg.sub_32
+    %37:g8rc_and_g8rc_nox0 = RLDICL killed %35, 0, 32
+    %38:g8rc = nuw nsw ADDI8 killed %37, 1
+    MTCTR8loop killed %38, implicit-def dead $ctr8
+
+  bb.5.for.body.epil:
+    successors: %bb.3(0x40000000), %bb.5(0x7c000000)
+    ; We check that no phi node is inserted in the block.
+    ; CHECK-LABEL: bb.{{[0-9]}}.for.body.epil:
+    ; CHECK-NEXT: successors: %bb.{{[0-9]}}(0x{{[0-9a-f]+}}), %bb.{{[0-9]}}(0x{{[0-9a-f]+}})
+    ; CHECK-NEXT: {{ }}
+    ; CHECK-NEXT: %2:uaccrc = PHI
+    ; CHECK-NEXT: %39:vsrc
+    %2:uaccrc = PHI %1, %bb.4, %3, %bb.5
+    %39:vsrc = COPY %8
+    %41:accrc = COPY %2
+    %40:accrc = XVF32GERPP %41, %39, %39
+    %3:uaccrc = COPY %40
+    %15:crrc = CMPLWI %14, 7
+    BCC 12, killed %15, %bb.5
+    B %bb.3
+
+  bb.6.for.cond.cleanup:
+    %4:uaccrc = PHI %11, %bb.8, %1, %bb.3
+    %43:accrc = COPY %4
+    %42:accrc = XXMFACC %43
+    %44:vsrc = COPY %42.sub_vsx1
+    %46:vsrprc = IMPLICIT_DEF
+    %45:vsrprc = INSERT_SUBREG %46, killed %44, %subreg.sub_vsx1
+    %47:vsrc = COPY %42.sub_vsx0
+    %48:vsrprc = INSERT_SUBREG %45, killed %47, %subreg.sub_vsx0
+    STXVP killed %48, 96, %9 :: (store 32 into %ir.add.ptr + 32)
+    %49:vsrprc = COPY %42.sub_pair1
+    %50:vsrc = COPY %49.sub_vsx1
+    %52:vsrprc = IMPLICIT_DEF
+    %51:vsrprc = INSERT_SUBREG %52, killed %50, %subreg.sub_vsx1
+    %53:vsrc = COPY %49.sub_vsx0
+    %54:vsrprc = INSERT_SUBREG %51, killed %53, %subreg.sub_vsx0
+    STXVP killed %54, 64, %9 :: (store 32 into %ir.add.ptr, align 64)
+    BLR8 implicit $lr8, implicit $rm
+
+  bb.7.for.body:
+    successors: %bb.7(0x7c000000), %bb.3(0x04000000)
+
+    %5:uaccrc = PHI %16, %bb.2, %6, %bb.7
+    %23:vsrc = COPY %8
+    %25:accrc = COPY %5
+    %24:accrc = XVF32GERPP %25, %23, %23
+    %26:accrc = XVF32GERPP %24, %23, %23
+    %27:accrc = XVF32GERPP %26, %23, %23
+    %28:accrc = XVF32GERPP %27, %23, %23
+    %29:accrc = XVF32GERPP %28, %23, %23
+    %30:accrc = XVF32GERPP %29, %23, %23
+    %31:accrc = XVF32GERPP %30, %23, %23
+    %32:accrc = XVF32GERPP %31, %23, %23
+    %6:uaccrc = COPY %32
+    BDNZ8 %bb.7, implicit-def dead $ctr8, implicit $ctr8
+    B %bb.3
+
+...


        


More information about the llvm-commits mailing list