[llvm] 1a6dc92 - [PowerPC] Inefficient register allocation of ACC registers results in many copies.

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 20 08:53:47 PDT 2021


Author: Stefan Pintilie
Date: 2021-07-20T10:53:40-05:00
New Revision: 1a6dc92be7d68611077f0fb0b723b361817c950c

URL: https://github.com/llvm/llvm-project/commit/1a6dc92be7d68611077f0fb0b723b361817c950c
DIFF: https://github.com/llvm/llvm-project/commit/1a6dc92be7d68611077f0fb0b723b361817c950c.diff

LOG: [PowerPC] Inefficient register allocation of ACC registers results in many copies.

ACC registers are a combination of four consecutive vector registers.
If the vector registers are assigned first this often forces a number
of copies to appear just before the ACC register is created. If the ACC
register is assigned first then fewer copies are generated when the vector
registers are assigned.

This patch tries to force the register allocator to assign the ACC registers first
and then the UACC registers and then the vector pair registers. It does this
by changing the priority of the register classes.

This patch also adds hints to help the register allocator assign UACC registers from
known ACC registers and vector pair registers from known UACC registers.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D105854

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetRegisterInfo.h
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.h
    llvm/lib/Target/PowerPC/PPCRegisterInfo.td
    llvm/test/CodeGen/PowerPC/mma-outer-product.ll
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
    llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 70017173a0de3..92ce5b737090c 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -871,6 +871,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
   /// (3) Bottom-up allocation is no longer guaranteed to optimally color.
   virtual bool reverseLocalAssignment() const { return false; }
 
+  /// Add the allocation priority to global and split ranges as well as the
+  /// local ranges when registers are added to the queue.
+  virtual bool addAllocPriorityToGlobalRanges() const { return false; }
+
   /// Allow the target to override the cost of using a callee-saved register for
   /// the first time. Default value of 0 means we will use a callee-saved
   /// register if it is available.

diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index b58b700a5c451..4eb12aa30ee9b 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -760,6 +760,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
     bool ReverseLocal = TRI->reverseLocalAssignment();
+    bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();
     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
     bool ForceGlobal = !ReverseLocal &&
       (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
@@ -783,6 +784,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
       // don't fit should be spilled (or split) ASAP so they don't create
       // interference.  Mark a bit to prioritize global above local ranges.
       Prio = (1u << 29) + Size;
+
+      if (AddPriorityToGlobal)
+        Prio |= RC.AllocationPriority << 24;
     }
     // Mark a higher bit to prioritize global and local above RS_Split.
     Prio |= (1u << 31);

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index bc950a73beb6a..4f16c7f5ff175 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -472,6 +472,62 @@ bool PPCRegisterInfo::isCallerPreservedPhysReg(MCRegister PhysReg,
   return false;
 }
 
+bool PPCRegisterInfo::getRegAllocationHints(Register VirtReg,
+                                            ArrayRef<MCPhysReg> Order,
+                                            SmallVectorImpl<MCPhysReg> &Hints,
+                                            const MachineFunction &MF,
+                                            const VirtRegMap *VRM,
+                                            const LiveRegMatrix *Matrix) const {
+  const MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+  // Call the base implementation first to set any hints based on the usual
+  // heuristics and decide what the return value should be. We want to return
+  // the same value returned by the base implementation. If the base
+  // implementation decides to return true and force the allocation then we
+  // will leave it as such. On the other hand if the base implementation
+  // decides to return false the following code will not force the allocation
+  // as we are just looking to provide a hint.
+  bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
+      VirtReg, Order, Hints, MF, VRM, Matrix);
+  // We are interested in instructions that copy values to ACC/UACC.
+  // The copy into UACC will be simply a COPY to a subreg so we
+  // want to allocate the corresponding physical subreg for the source.
+  // The copy into ACC will be a BUILD_UACC so we want to allocate
+  // the same number UACC for the source.
+  for (MachineInstr &Use : MRI->reg_nodbg_instructions(VirtReg)) {
+    const MachineOperand *ResultOp = nullptr;
+    Register ResultReg;
+    switch (Use.getOpcode()) {
+    case TargetOpcode::COPY: {
+      ResultOp = &Use.getOperand(0);
+      ResultReg = ResultOp->getReg();
+      if (Register::isVirtualRegister(ResultReg) &&
+          MRI->getRegClass(ResultReg)->contains(PPC::UACC0) &&
+          VRM->hasPhys(ResultReg)) {
+        Register UACCPhys = VRM->getPhys(ResultReg);
+        Register HintReg = getSubReg(UACCPhys, ResultOp->getSubReg());
+        Hints.push_back(HintReg);
+      }
+      break;
+    }
+    case PPC::BUILD_UACC: {
+      ResultOp = &Use.getOperand(0);
+      ResultReg = ResultOp->getReg();
+      if (MRI->getRegClass(ResultReg)->contains(PPC::ACC0) &&
+          VRM->hasPhys(ResultReg)) {
+        Register ACCPhys = VRM->getPhys(ResultReg);
+        assert((ACCPhys >= PPC::ACC0 && ACCPhys <= PPC::ACC7) &&
+               "Expecting an ACC register for BUILD_UACC.");
+        Register HintReg = PPC::UACC0 + (ACCPhys - PPC::ACC0);
+        Hints.push_back(HintReg);
+      }
+      break;
+    }
+    }
+  }
+  return BaseImplRetVal;
+}
+
 unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                               MachineFunction &MF) const {
   const PPCFrameLowering *TFI = getFrameLowering(MF);

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 27fc01c163da7..c22a5826337bd 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -94,6 +94,16 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   bool isCallerPreservedPhysReg(MCRegister PhysReg,
                                 const MachineFunction &MF) const override;
 
+  // Provide hints to the register allocator for allocating subregisters
+  // of primed and unprimed accumulators. For example, if accumulator
+  // ACC5 is assigned, we also want to assign UACC5 to the input.
+  // Similarly if UACC5 is assigned, we want to assign VSRp10, VSRp11
+  // to its inputs.
+  bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
+                             SmallVectorImpl<MCPhysReg> &Hints,
+                             const MachineFunction &MF, const VirtRegMap *VRM,
+                             const LiveRegMatrix *Matrix) const override;
+
   /// We require the register scavenger.
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
@@ -137,6 +147,8 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
+  bool addAllocPriorityToGlobalRanges() const override { return true; }
+
   // Support for virtual base registers.
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
   Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index e1d1c52aa53ae..044035e0ef29e 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -460,6 +460,13 @@ let SubRegIndices = [sub_pair0, sub_pair1] in {
 }
 def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
                                                       ACC4, ACC5, ACC6, ACC7)> {
+  // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers
+  // the highest possible priority in this range to force the register allocator
+  // to assign these registers first. This is done because the ACC registers
+  // must represent 4 advacent vector registers. For example ACC1 must be
+  // VS4 - VS7. The value here must be at least 32 as we want to allocate
+  // these registers even before we allocate global ranges.
+  let AllocationPriority = 63;
   let Size = 512;
 }
 
@@ -476,6 +483,11 @@ let SubRegIndices = [sub_pair0, sub_pair1] in {
 def UACCRC : RegisterClass<"PPC", [v512i1], 128,
                            (add UACC0, UACC1, UACC2, UACC3,
                                 UACC4, UACC5, UACC6, UACC7)> {
+  // The AllocationPriority for the UACC registers is still high and must be at
+  // least 32 as we want to allocate these registers before we allocate other
+  // global ranges. The value must be less than the AllocationPriority of the
+  // ACC registers.
+  let AllocationPriority = 36;
   let Size = 512;
 }
 
@@ -493,6 +505,12 @@ def VSRpRC :
                      VSRp29, VSRp28, VSRp27, VSRp26,
                      (sequence "VSRp%u", 0, 6),
                      (sequence "VSRp%u", 15, 7))> {
+  // Give the VSRp registers a non-zero AllocationPriority. The value is less
+  // than 32 as these registers should not always be allocated before global
+  // ranges and the value should be less than the AllocationPriority - 32 for
+  // the UACC registers. Even global VSRp registers should be allocated after
+  // the UACC registers have been chosen.
+  let AllocationPriority = 2;
   let Size = 256;
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index a6e1756317091..a2eeceb099773 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
-; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
 
 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
@@ -11,20 +11,19 @@ declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
 define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) {
 ; CHECK-LABEL: intrinsics1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
 ; CHECK-NEXT:    vmr v1, v4
 ; CHECK-NEXT:    vmr v4, v3
 ; CHECK-NEXT:    ld r3, 96(r1)
 ; CHECK-NEXT:    vmr v0, v2
+; CHECK-NEXT:    xxlor vs3, v5, v5
 ; CHECK-NEXT:    xxlor vs0, v0, v0
 ; CHECK-NEXT:    xxlor vs1, v1, v1
 ; CHECK-NEXT:    xxlor vs2, v4, v4
-; CHECK-NEXT:    xxlor vs3, v5, v5
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xvi4ger8pp acc0, v2, v3
 ; CHECK-NEXT:    xvf16ger2pp acc0, v2, v1
 ; CHECK-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
-; CHECK-NEXT:    vmr v3, v0
+; CHECK-NEXT:    vmr v3, v2
 ; CHECK-NEXT:    vmr v2, v5
 ; CHECK-NEXT:    pmxvf64gernp acc0, vsp34, v0, 0, 0
 ; CHECK-NEXT:    xxmfacc acc0
@@ -36,20 +35,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ;
 ; CHECK-BE-LABEL: intrinsics1:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
 ; CHECK-BE-NEXT:    vmr v1, v4
 ; CHECK-BE-NEXT:    vmr v4, v3
 ; CHECK-BE-NEXT:    ld r3, 112(r1)
 ; CHECK-BE-NEXT:    vmr v0, v2
+; CHECK-BE-NEXT:    xxlor vs3, v5, v5
 ; CHECK-BE-NEXT:    xxlor vs0, v0, v0
 ; CHECK-BE-NEXT:    xxlor vs1, v1, v1
 ; CHECK-BE-NEXT:    xxlor vs2, v4, v4
-; CHECK-BE-NEXT:    xxlor vs3, v5, v5
 ; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v3
 ; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v1
 ; CHECK-BE-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
-; CHECK-BE-NEXT:    vmr v3, v0
+; CHECK-BE-NEXT:    vmr v3, v2
 ; CHECK-BE-NEXT:    vmr v2, v5
 ; CHECK-BE-NEXT:    pmxvf64gernp acc0, vsp34, v0, 0, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -78,10 +76,10 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
 ; CHECK-NEXT:    lxv v4, 0(r5)
 ; CHECK-NEXT:    lxv v5, 0(r6)
 ; CHECK-NEXT:    xxlor vs0, v2, v2
-; CHECK-NEXT:    vmr v1, v2
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    xxlor vs2, v4, v4
 ; CHECK-NEXT:    xxlor vs3, v5, v5
+; CHECK-NEXT:    vmr v1, v2
 ; CHECK-NEXT:    vmr v0, v5
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xvi8ger4pp acc0, v2, v3
@@ -102,10 +100,10 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
 ; CHECK-BE-NEXT:    lxv v4, 0(r5)
 ; CHECK-BE-NEXT:    lxv v5, 0(r6)
 ; CHECK-BE-NEXT:    xxlor vs0, v2, v2
-; CHECK-BE-NEXT:    vmr v1, v2
 ; CHECK-BE-NEXT:    xxlor vs1, v3, v3
 ; CHECK-BE-NEXT:    xxlor vs2, v4, v4
 ; CHECK-BE-NEXT:    xxlor vs3, v5, v5
+; CHECK-BE-NEXT:    vmr v1, v2
 ; CHECK-BE-NEXT:    vmr v0, v5
 ; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    xvi8ger4pp acc0, v2, v3

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 258355d7e7b43..6ab9642f92fbd 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -123,7 +123,7 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    sldi 31, 3, 1
 ; CHECK-NEXT:    std 8, 32(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 9, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 41, 0(8)
+; CHECK-NEXT:    lxv 43, 0(8)
 ; CHECK-NEXT:    mr 8, 6
 ; CHECK-NEXT:    sldi 6, 3, 3
 ; CHECK-NEXT:    std 2, 144(1) # 8-byte Folded Spill
@@ -134,21 +134,21 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    add 6, 6, 23
 ; CHECK-NEXT:    lxv 7, 0(28)
 ; CHECK-NEXT:    add 28, 3, 31
-; CHECK-NEXT:    lxv 40, 0(9)
-; CHECK-NEXT:    lxv 39, 0(10)
-; CHECK-NEXT:    lxv 38, 0(15)
-; CHECK-NEXT:    lxv 33, 0(14)
-; CHECK-NEXT:    lxv 32, 0(16)
-; CHECK-NEXT:    lxv 37, 0(17)
-; CHECK-NEXT:    lxv 35, 0(18)
+; CHECK-NEXT:    lxv 42, 0(9)
+; CHECK-NEXT:    lxv 41, 0(10)
+; CHECK-NEXT:    lxv 40, 0(15)
+; CHECK-NEXT:    lxv 39, 0(14)
+; CHECK-NEXT:    lxv 38, 0(16)
+; CHECK-NEXT:    lxv 33, 0(17)
+; CHECK-NEXT:    lxv 37, 0(18)
 ; CHECK-NEXT:    lxv 13, 0(19)
 ; CHECK-NEXT:    lxv 10, 0(20)
 ; CHECK-NEXT:    lxv 8, 0(21)
 ; CHECK-NEXT:    lxv 6, 0(22)
 ; CHECK-NEXT:    lxv 4, 0(30)
 ; CHECK-NEXT:    lxv 1, 0(12)
-; CHECK-NEXT:    lxv 36, 0(24)
-; CHECK-NEXT:    lxv 34, 0(25)
+; CHECK-NEXT:    lxv 32, 0(24)
+; CHECK-NEXT:    lxv 36, 0(25)
 ; CHECK-NEXT:    lxv 12, 0(26)
 ; CHECK-NEXT:    lxv 9, 0(27)
 ; CHECK-NEXT:    lxv 5, 0(29)
@@ -216,7 +216,7 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:  .LBB0_4: # %_loop_2_do_
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lxvp 42, 0(6)
+; CHECK-NEXT:    lxvp 34, 0(6)
 ; CHECK-NEXT:    lxvp 44, 0(16)
 ; CHECK-NEXT:    lxvp 46, 0(17)
 ; CHECK-NEXT:    lxvp 48, 0(18)
@@ -233,19 +233,19 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    addi 17, 17, 64
 ; CHECK-NEXT:    addi 18, 18, 64
 ; CHECK-NEXT:    addi 19, 19, 64
-; CHECK-NEXT:    xvmaddadp 41, 45, 43
-; CHECK-NEXT:    xvmaddadp 40, 47, 43
-; CHECK-NEXT:    xvmaddadp 39, 49, 43
-; CHECK-NEXT:    xvmaddadp 38, 51, 43
-; CHECK-NEXT:    xvmaddadp 33, 63, 43
-; CHECK-NEXT:    xvmaddadp 32, 61, 43
-; CHECK-NEXT:    xvmaddadp 37, 44, 42
-; CHECK-NEXT:    xvmaddadp 35, 46, 42
-; CHECK-NEXT:    xvmaddadp 13, 48, 42
-; CHECK-NEXT:    xvmaddadp 11, 50, 42
-; CHECK-NEXT:    xvmaddadp 10, 62, 42
-; CHECK-NEXT:    xvmaddadp 8, 60, 42
-; CHECK-NEXT:    lxvp 42, 32(20)
+; CHECK-NEXT:    xvmaddadp 43, 45, 35
+; CHECK-NEXT:    xvmaddadp 42, 47, 35
+; CHECK-NEXT:    xvmaddadp 41, 49, 35
+; CHECK-NEXT:    xvmaddadp 40, 51, 35
+; CHECK-NEXT:    xvmaddadp 39, 63, 35
+; CHECK-NEXT:    xvmaddadp 38, 61, 35
+; CHECK-NEXT:    xvmaddadp 33, 44, 34
+; CHECK-NEXT:    xvmaddadp 37, 46, 34
+; CHECK-NEXT:    xvmaddadp 13, 48, 34
+; CHECK-NEXT:    xvmaddadp 11, 50, 34
+; CHECK-NEXT:    xvmaddadp 10, 62, 34
+; CHECK-NEXT:    xvmaddadp 8, 60, 34
+; CHECK-NEXT:    lxvp 34, 32(20)
 ; CHECK-NEXT:    lxvp 44, 32(21)
 ; CHECK-NEXT:    addi 20, 20, 64
 ; CHECK-NEXT:    addi 21, 21, 64
@@ -253,13 +253,13 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    xvmaddadp 4, 55, 59
 ; CHECK-NEXT:    xvmaddadp 3, 53, 59
 ; CHECK-NEXT:    xvmaddadp 2, 31, 59
-; CHECK-NEXT:    xvmaddadp 36, 56, 58
-; CHECK-NEXT:    xvmaddadp 34, 54, 58
+; CHECK-NEXT:    xvmaddadp 32, 56, 58
+; CHECK-NEXT:    xvmaddadp 36, 54, 58
 ; CHECK-NEXT:    xvmaddadp 12, 52, 58
 ; CHECK-NEXT:    xvmaddadp 9, 30, 58
-; CHECK-NEXT:    xvmaddadp 1, 43, 59
+; CHECK-NEXT:    xvmaddadp 1, 35, 59
 ; CHECK-NEXT:    xvmaddadp 0, 45, 59
-; CHECK-NEXT:    xvmaddadp 7, 42, 58
+; CHECK-NEXT:    xvmaddadp 7, 34, 58
 ; CHECK-NEXT:    xvmaddadp 5, 44, 58
 ; CHECK-NEXT:    bdnz .LBB0_4
 ; CHECK-NEXT:  # %bb.5: # %_loop_2_endl_
@@ -276,21 +276,21 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    ble 0, .LBB0_3
 ; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
 ; CHECK-NEXT:    ld 3, 32(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 41, 0(3)
+; CHECK-NEXT:    stxv 43, 0(3)
 ; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 40, 0(3)
+; CHECK-NEXT:    stxv 42, 0(3)
 ; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 39, 0(3)
+; CHECK-NEXT:    stxv 41, 0(3)
 ; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 38, 0(3)
+; CHECK-NEXT:    stxv 40, 0(3)
 ; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 33, 0(3)
+; CHECK-NEXT:    stxv 39, 0(3)
 ; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 32, 0(3)
+; CHECK-NEXT:    stxv 38, 0(3)
 ; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 37, 0(3)
+; CHECK-NEXT:    stxv 33, 0(3)
 ; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 35, 0(3)
+; CHECK-NEXT:    stxv 37, 0(3)
 ; CHECK-NEXT:    ld 3, 96(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 13, 0(3)
 ; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
@@ -312,9 +312,9 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    ld 3, 168(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 0, 0(3)
 ; CHECK-NEXT:    ld 3, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 36, 0(3)
+; CHECK-NEXT:    stxv 32, 0(3)
 ; CHECK-NEXT:    ld 3, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 34, 0(3)
+; CHECK-NEXT:    stxv 36, 0(3)
 ; CHECK-NEXT:    ld 3, 192(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 12, 0(3)
 ; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
index ace652d503ae9..d4942d6ecd0b1 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
@@ -13,213 +13,194 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; CHECK-LABEL: acc_regalloc:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    lwz r3, 0(r3)
-; CHECK-NEXT:    lxv vs0, 0(0)
-; CHECK-NEXT:    xxlxor vs2, vs2, vs2
-; CHECK-NEXT:    xxlxor vs3, vs3, vs3
+; CHECK-NEXT:    lxv v4, 0(0)
+; CHECK-NEXT:    xxlxor v0, v0, v0
+; CHECK-NEXT:    xxlxor v1, v1, v1
 ; CHECK-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    xxlxor v2, v2, v2
 ; CHECK-NEXT:    li r6, 1
 ; CHECK-NEXT:    li r4, 16
-; CHECK-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    extswsli r3, r3, 3
-; CHECK-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f19, -104(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    xvmaddadp vs3, vs0, vs3
-; CHECK-NEXT:    lxvdsx vs1, 0, r3
-; CHECK-NEXT:    xvmaddadp vs2, vs1, vs2
+; CHECK-NEXT:    xvmaddadp v1, v4, v1
+; CHECK-NEXT:    lxvdsx v5, 0, r3
+; CHECK-NEXT:    xvmaddadp v0, v5, v0
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb9
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi r6, r6, 2
-; CHECK-NEXT:    lxv vs5, -64(r5)
-; CHECK-NEXT:    lxv vs6, -16(r5)
-; CHECK-NEXT:    lxv vs4, 16(0)
-; CHECK-NEXT:    xxlor v7, vs2, vs2
-; CHECK-NEXT:    xxlxor v8, v8, v8
-; CHECK-NEXT:    xxlxor v1, v1, v1
+; CHECK-NEXT:    lxv vs1, -64(r5)
+; CHECK-NEXT:    lxv vs2, -16(r5)
+; CHECK-NEXT:    lxv vs0, 16(0)
+; CHECK-NEXT:    vmr v9, v0
+; CHECK-NEXT:    xxlxor v10, v10, v10
+; CHECK-NEXT:    xxlxor v7, v7, v7
 ; CHECK-NEXT:    mulld r6, r6, r3
-; CHECK-NEXT:    xvmaddadp v7, vs5, v2
-; CHECK-NEXT:    xxlxor v6, v6, v6
-; CHECK-NEXT:    xvmaddadp v8, vs6, v8
-; CHECK-NEXT:    xvmaddadp v1, vs4, vs1
-; CHECK-NEXT:    xvmuldp v0, vs4, v2
-; CHECK-NEXT:    xvmaddadp v1, v2, v2
-; CHECK-NEXT:    xvmaddadp v0, v2, v2
-; CHECK-NEXT:    lxvdsx v4, r6, r4
-; CHECK-NEXT:    xvmaddadp v6, vs5, v6
+; CHECK-NEXT:    xvmaddadp v9, vs1, v2
+; CHECK-NEXT:    xxlxor v8, v8, v8
+; CHECK-NEXT:    xvmaddadp v10, vs2, v10
+; CHECK-NEXT:    xvmaddadp v7, vs0, v5
+; CHECK-NEXT:    xvmuldp v6, vs0, v2
+; CHECK-NEXT:    xvmaddadp v7, v2, v2
+; CHECK-NEXT:    xvmaddadp v6, v2, v2
+; CHECK-NEXT:    lxvdsx v14, r6, r4
+; CHECK-NEXT:    xvmaddadp v8, vs1, v8
 ; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    xvmuldp v9, vs6, v4
-; CHECK-NEXT:    xvmuldp v3, vs5, v4
-; CHECK-NEXT:    xvmuldp v11, vs0, v4
-; CHECK-NEXT:    vmr v10, v2
-; CHECK-NEXT:    xvmuldp v5, v4, v2
-; CHECK-NEXT:    vmr v4, v2
-; CHECK-NEXT:    xxlor vs18, v8, v8
+; CHECK-NEXT:    xvmuldp v11, vs2, v14
+; CHECK-NEXT:    xvmuldp v3, vs1, v14
+; CHECK-NEXT:    xvmuldp vs5, v14, v2
+; CHECK-NEXT:    xvmuldp v13, v4, v14
+; CHECK-NEXT:    vmr v12, v2
+; CHECK-NEXT:    xxlor vs14, v10, v10
+; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    xxlor vs4, v2, v2
-; CHECK-NEXT:    xxlor vs12, v10, v10
-; CHECK-NEXT:    xxlor vs13, v11, v11
-; CHECK-NEXT:    xxlor v10, vs3, vs3
-; CHECK-NEXT:    xxlor vs8, v4, v4
-; CHECK-NEXT:    xxlor vs9, v5, v5
-; CHECK-NEXT:    xxlor vs10, v0, v0
-; CHECK-NEXT:    xxlor vs11, v1, v1
-; CHECK-NEXT:    xxmtacc acc2
-; CHECK-NEXT:    xxlor vs19, v9, v9
-; CHECK-NEXT:    vmr v8, v2
-; CHECK-NEXT:    xxlor vs5, v3, v3
+; CHECK-NEXT:    # kill: def $vsrp2 killed $vsrp2 def $uacc1
 ; CHECK-NEXT:    xxlor vs6, v6, v6
 ; CHECK-NEXT:    xxlor vs7, v7, v7
-; CHECK-NEXT:    xxlor vs14, v10, v10
+; CHECK-NEXT:    xxlor vs8, v12, v12
+; CHECK-NEXT:    xxlor vs9, v13, v13
+; CHECK-NEXT:    vmr v12, v1
 ; CHECK-NEXT:    xxlor vs15, v11, v11
-; CHECK-NEXT:    xxlor vs16, v8, v8
-; CHECK-NEXT:    xxlor vs17, v9, v9
+; CHECK-NEXT:    vmr v10, v2
+; CHECK-NEXT:    xxlor vs1, v3, v3
+; CHECK-NEXT:    xxlor vs2, v8, v8
+; CHECK-NEXT:    xxlor vs3, v9, v9
+; CHECK-NEXT:    xxlor vs10, v12, v12
+; CHECK-NEXT:    xxlor vs11, v13, v13
 ; CHECK-NEXT:    xxmtacc acc1
+; CHECK-NEXT:    xxlor vs12, v10, v10
+; CHECK-NEXT:    xxlor vs13, v11, v11
+; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    xxmtacc acc2
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xxmtacc acc3
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xxmtacc acc4
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; CHECK-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    xxmfacc acc1
 ; CHECK-NEXT:    xxmfacc acc2
 ; CHECK-NEXT:    xxmfacc acc3
-; CHECK-NEXT:    xxmfacc acc4
-; CHECK-NEXT:    stxv vs5, 0(r3)
-; CHECK-NEXT:    stxv vs13, 32(r3)
-; CHECK-NEXT:    stxv vs8, 16(0)
-; CHECK-NEXT:    stxv vs16, 48(0)
+; CHECK-NEXT:    stxv vs1, 0(r3)
+; CHECK-NEXT:    stxv vs9, 32(r3)
+; CHECK-NEXT:    stxv vs4, 16(0)
+; CHECK-NEXT:    stxv vs12, 48(0)
 ; CHECK-NEXT:    b .LBB0_1
 ;
 ; TRACKLIVE-LABEL: acc_regalloc:
 ; TRACKLIVE:       # %bb.0: # %bb
 ; TRACKLIVE-NEXT:    lwz r3, 0(r3)
-; TRACKLIVE-NEXT:    lxv vs0, 0(0)
-; TRACKLIVE-NEXT:    xxlxor vs2, vs2, vs2
-; TRACKLIVE-NEXT:    xxlxor vs3, vs3, vs3
+; TRACKLIVE-NEXT:    lxv v4, 0(0)
+; TRACKLIVE-NEXT:    xxlxor v0, v0, v0
+; TRACKLIVE-NEXT:    xxlxor v1, v1, v1
 ; TRACKLIVE-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
 ; TRACKLIVE-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; TRACKLIVE-NEXT:    xxlxor v2, v2, v2
 ; TRACKLIVE-NEXT:    li r6, 1
 ; TRACKLIVE-NEXT:    li r4, 16
-; TRACKLIVE-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
-; TRACKLIVE-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
 ; TRACKLIVE-NEXT:    extswsli r3, r3, 3
-; TRACKLIVE-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
-; TRACKLIVE-NEXT:    stfd f19, -104(r1) # 8-byte Folded Spill
-; TRACKLIVE-NEXT:    xvmaddadp vs3, vs0, vs3
-; TRACKLIVE-NEXT:    lxvdsx vs1, 0, r3
-; TRACKLIVE-NEXT:    xvmaddadp vs2, vs1, vs2
+; TRACKLIVE-NEXT:    xvmaddadp v1, v4, v1
+; TRACKLIVE-NEXT:    lxvdsx v5, 0, r3
+; TRACKLIVE-NEXT:    xvmaddadp v0, v5, v0
 ; TRACKLIVE-NEXT:    .p2align 4
 ; TRACKLIVE-NEXT:  .LBB0_1: # %bb9
 ; TRACKLIVE-NEXT:    #
 ; TRACKLIVE-NEXT:    addi r6, r6, 2
-; TRACKLIVE-NEXT:    lxv vs4, 16(0)
-; TRACKLIVE-NEXT:    xxlxor v1, v1, v1
-; TRACKLIVE-NEXT:    lxv vs6, -16(r5)
-; TRACKLIVE-NEXT:    lxv vs5, -64(r5)
-; TRACKLIVE-NEXT:    xxlxor v8, v8, v8
-; TRACKLIVE-NEXT:    xxlor v7, vs2, vs2
-; TRACKLIVE-NEXT:    xxlxor v6, v6, v6
+; TRACKLIVE-NEXT:    lxv vs0, 16(0)
+; TRACKLIVE-NEXT:    xxlxor vs7, vs7, vs7
+; TRACKLIVE-NEXT:    lxv vs1, -64(r5)
+; TRACKLIVE-NEXT:    lxv vs4, -16(r5)
+; TRACKLIVE-NEXT:    xxlxor vs12, vs12, vs12
+; TRACKLIVE-NEXT:    xxlor vs3, v0, v0
+; TRACKLIVE-NEXT:    xxlxor vs2, vs2, vs2
 ; TRACKLIVE-NEXT:    mulld r6, r6, r3
-; TRACKLIVE-NEXT:    vmr v10, v2
-; TRACKLIVE-NEXT:    xxlor vs8, v10, v10
-; TRACKLIVE-NEXT:    xvmaddadp v1, vs4, vs1
-; TRACKLIVE-NEXT:    xvmuldp v0, vs4, v2
-; TRACKLIVE-NEXT:    xvmaddadp v8, vs6, v8
-; TRACKLIVE-NEXT:    xvmaddadp v7, vs5, v2
-; TRACKLIVE-NEXT:    xvmaddadp v6, vs5, v6
-; TRACKLIVE-NEXT:    xxlor vs4, v2, v2
-; TRACKLIVE-NEXT:    lxvdsx v4, r6, r4
+; TRACKLIVE-NEXT:    xxlor vs10, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs8, vs10, vs10
+; TRACKLIVE-NEXT:    xxlor vs10, v1, v1
+; TRACKLIVE-NEXT:    xvmaddadp vs7, vs0, v5
+; TRACKLIVE-NEXT:    xvmuldp vs6, vs0, v2
+; TRACKLIVE-NEXT:    xvmaddadp vs12, vs4, vs12
+; TRACKLIVE-NEXT:    xvmaddadp vs3, vs1, v2
+; TRACKLIVE-NEXT:    xvmaddadp vs2, vs1, vs2
+; TRACKLIVE-NEXT:    xxlor vs0, v2, v2
+; TRACKLIVE-NEXT:    lxvdsx v6, r6, r4
 ; TRACKLIVE-NEXT:    li r6, 0
-; TRACKLIVE-NEXT:    xvmaddadp v1, v2, v2
-; TRACKLIVE-NEXT:    xvmaddadp v0, v2, v2
-; TRACKLIVE-NEXT:    xxlor vs18, v8, v8
-; TRACKLIVE-NEXT:    vmr v8, v2
-; TRACKLIVE-NEXT:    xxlor vs7, v7, v7
-; TRACKLIVE-NEXT:    xxlor vs16, v8, v8
-; TRACKLIVE-NEXT:    xvmuldp v3, vs5, v4
-; TRACKLIVE-NEXT:    xvmuldp v5, vs0, v4
-; TRACKLIVE-NEXT:    xvmuldp v9, vs6, v4
-; TRACKLIVE-NEXT:    xvmuldp v11, v4, v2
-; TRACKLIVE-NEXT:    vmr v4, v2
-; TRACKLIVE-NEXT:    xxlor vs6, v6, v6
-; TRACKLIVE-NEXT:    xxlor vs12, v4, v4
-; TRACKLIVE-NEXT:    xxlor v4, vs3, vs3
-; TRACKLIVE-NEXT:    xxlor vs10, v0, v0
-; TRACKLIVE-NEXT:    xxlor vs11, v1, v1
-; TRACKLIVE-NEXT:    xxlor vs14, v4, v4
-; TRACKLIVE-NEXT:    xxlor vs5, v3, v3
-; TRACKLIVE-NEXT:    xxlor vs9, v11, v11
-; TRACKLIVE-NEXT:    xxlor vs13, v5, v5
-; TRACKLIVE-NEXT:    xxlor vs15, v5, v5
-; TRACKLIVE-NEXT:    xxlor vs19, v9, v9
-; TRACKLIVE-NEXT:    xxlor vs17, v9, v9
+; TRACKLIVE-NEXT:    xvmaddadp vs7, v2, v2
+; TRACKLIVE-NEXT:    xvmaddadp vs6, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs14, vs12, vs12
+; TRACKLIVE-NEXT:    xxlor vs12, v2, v2
+; TRACKLIVE-NEXT:    xvmuldp v3, vs1, v6
+; TRACKLIVE-NEXT:    xvmuldp vs11, v4, v6
+; TRACKLIVE-NEXT:    xvmuldp vs13, vs4, v6
+; TRACKLIVE-NEXT:    xvmuldp vs5, v6, v2
+; TRACKLIVE-NEXT:    xxlor vs4, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs1, v3, v3
+; TRACKLIVE-NEXT:    xxlor vs9, vs11, vs11
+; TRACKLIVE-NEXT:    xxlor vs15, vs13, vs13
 ; TRACKLIVE-NEXT:    xxmtacc acc1
+; TRACKLIVE-NEXT:    xxmtacc acc0
 ; TRACKLIVE-NEXT:    xxmtacc acc2
 ; TRACKLIVE-NEXT:    xxmtacc acc3
-; TRACKLIVE-NEXT:    xxmtacc acc4
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
-; TRACKLIVE-NEXT:    xvf64gerpp acc4, vsp34, vs0
+; TRACKLIVE-NEXT:    xxmfacc acc0
 ; TRACKLIVE-NEXT:    xxmfacc acc1
 ; TRACKLIVE-NEXT:    xxmfacc acc2
 ; TRACKLIVE-NEXT:    xxmfacc acc3
-; TRACKLIVE-NEXT:    xxmfacc acc4
-; TRACKLIVE-NEXT:    stxv vs5, 0(r3)
-; TRACKLIVE-NEXT:    stxv vs13, 32(r3)
-; TRACKLIVE-NEXT:    stxv vs8, 16(0)
-; TRACKLIVE-NEXT:    stxv vs16, 48(0)
+; TRACKLIVE-NEXT:    stxv vs1, 0(r3)
+; TRACKLIVE-NEXT:    stxv vs9, 32(r3)
+; TRACKLIVE-NEXT:    stxv vs4, 16(0)
+; TRACKLIVE-NEXT:    stxv vs12, 48(0)
 ; TRACKLIVE-NEXT:    b .LBB0_1
 bb:
   %i = load i32, i32* %arg, align 4


        


More information about the llvm-commits mailing list