[llvm] 449f2f7 - [PowerPC] Duplicate inherited heuristic from base scheduler

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 21 18:11:13 PST 2021


Author: Qiu Chaofan
Date: 2021-01-22T10:11:03+08:00
New Revision: 449f2f7140e1d70d9c08bb609cde6cdd144c6035

URL: https://github.com/llvm/llvm-project/commit/449f2f7140e1d70d9c08bb609cde6cdd144c6035
DIFF: https://github.com/llvm/llvm-project/commit/449f2f7140e1d70d9c08bb609cde6cdd144c6035.diff

LOG: [PowerPC] Duplicate inherited heuristic from base scheduler

PowerPC has its custom scheduler heuristic. It calls parent classes'
tryCandidate in override version, but the function returns void, so this
way doesn't actually help. This patch duplicates code from base scheduler
into PPC machine scheduler class, which does what we wanted.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D94464

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
    llvm/test/CodeGen/PowerPC/botheightreduce.mir
    llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
    llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
    llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
    llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
    llvm/test/CodeGen/PowerPC/sched-addi.ll
    llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
    llvm/test/CodeGen/PowerPC/sms-phi-1.ll
    llvm/test/CodeGen/PowerPC/sms-simple.ll
    llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5649d7d13966..ce615e554d94 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -49,10 +49,103 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
 void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
                                          SchedCandidate &TryCand,
                                          SchedBoundary *Zone) const {
-  GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+  // From GenericScheduler::tryCandidate
 
-  if (!Cand.isValid() || !Zone)
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
     return;
+  }
+
+  // Bias PhysReg Defs and copies to their uses and defined respectively.
+  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
+    return;
+
+  // Avoid exceeding the target's limit.
+  if (DAG->isTrackingPressure() &&
+      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
+                  RegExcess, TRI, DAG->MF))
+    return;
+
+  // Avoid increasing the max critical pressure in the scheduled region.
+  if (DAG->isTrackingPressure() &&
+      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
+                  TryCand, Cand, RegCritical, TRI, DAG->MF))
+    return;
+
+  // We only compare a subset of features when comparing nodes between
+  // Top and Bottom boundary. Some properties are simply incomparable, in many
+  // other instances we should only override the other boundary if something
+  // is a clear good pick on one boundary. Skip heuristics that are more
+  // "tie-breaking" in nature.
+  bool SameBoundary = Zone != nullptr;
+  if (SameBoundary) {
+    // For loops that are acyclic path limited, aggressively schedule for
+    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+    // heuristics to take precedence.
+    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+        tryLatency(TryCand, Cand, *Zone))
+      return;
+
+    // Prioritize instructions that read unbuffered resources by stall cycles.
+    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+      return;
+  }
+
+  // Keep clustered nodes together to encourage downstream peephole
+  // optimizations which may reduce resource requirements.
+  //
+  // This is a best effort to set things up for a post-RA pass. Optimizations
+  // like generating loads of multiple registers should ideally be done within
+  // the scheduler pass by combining the loads during DAG postprocessing.
+  const SUnit *CandNextClusterSU =
+      Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+  const SUnit *TryCandNextClusterSU =
+      TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+                 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
+    return;
+
+  if (SameBoundary) {
+    // Weak edges are for clustering and other constraints.
+    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
+      return;
+  }
+
+  // Avoid increasing the max pressure of the entire region.
+  if (DAG->isTrackingPressure() &&
+      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
+                  Cand, RegMax, TRI, DAG->MF))
+    return;
+
+  if (SameBoundary) {
+    // Avoid critical resource consumption and balance the schedule.
+    TryCand.initResourceDelta(DAG, SchedModel);
+    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+                TryCand, Cand, ResourceReduce))
+      return;
+    if (tryGreater(TryCand.ResDelta.DemandedResources,
+                   Cand.ResDelta.DemandedResources, TryCand, Cand,
+                   ResourceDemand))
+      return;
+
+    // Avoid serializing long latency dependence chains.
+    // For acyclic path limited loops, latency was already checked above.
+    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+      return;
+
+    // Fall through to original instruction order.
+    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
+        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+      TryCand.Reason = NodeOrder;
+    }
+  }
+
+  // GenericScheduler::tryCandidate end
 
   // Add powerpc specific heuristic only when TryCand isn't selected or
   // selected as node order.
@@ -61,8 +154,10 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
 
   // There are some benefits to schedule the ADDI before the load to hide the
   // latency, as RA may create a true dependency between the load and addi.
-  if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
-    return;
+  if (SameBoundary) {
+    if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+      return;
+  }
 }
 
 bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
@@ -79,11 +174,44 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
 
 void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
                                           SchedCandidate &TryCand) {
-  PostGenericScheduler::tryCandidate(Cand, TryCand);
+  // From PostGenericScheduler::tryCandidate
+
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+
+  // Prioritize instructions that read unbuffered resources by stall cycles.
+  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+    return;
 
-  if (!Cand.isValid())
+  // Keep clustered nodes together.
+  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+                 Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
     return;
 
+  // Avoid critical resource consumption and balance the schedule.
+  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+              TryCand, Cand, ResourceReduce))
+    return;
+  if (tryGreater(TryCand.ResDelta.DemandedResources,
+                 Cand.ResDelta.DemandedResources, TryCand, Cand,
+                 ResourceDemand))
+    return;
+
+  // Avoid serializing long latency dependence chains.
+  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+    return;
+  }
+
+  // Fall through to original instruction order.
+  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+    TryCand.Reason = NodeOrder;
+
+  // PostGenericScheduler::tryCandidate end
+
   // Add powerpc post ra specific heuristic only when TryCand isn't selected or
   // selected as node order.
   if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)

diff  --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir
index 72b030273e82..7a2220cda31c 100644
--- a/llvm/test/CodeGen/PowerPC/botheightreduce.mir
+++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir
@@ -26,7 +26,6 @@ body: |
   ; CHECK:   [[LI8_6:%[0-9]+]]:g8rc = LI8 7
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1
   ; CHECK:   [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8)
   ; CHECK:   [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8)
   ; CHECK:   [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8)
@@ -34,9 +33,10 @@ body: |
   ; CHECK:   [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8)
   ; CHECK:   [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8)
   ; CHECK:   [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8)
-  ; CHECK:   [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8)
-  ; CHECK:   [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]]
   ; CHECK:   [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8)
+  ; CHECK:   [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]]
+  ; CHECK:   [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1
   ; CHECK:   [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]]
   ; CHECK:   [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]]
   ; CHECK:   [[MULLD3:%[0-9]+]]:g8rc = MULLD [[MULLD2]], [[LD1]]

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index 4d339e2383b3..206720f0ecb9 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -116,15 +116,14 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_2:
-; CHECK-NEXT:    ldx r9, r6, r7
-; CHECK-NEXT:    ld r10, 0(r6)
-; CHECK-NEXT:    ldx r11, r6, r5
-; CHECK-NEXT:    addi r8, r6, 1
-; CHECK-NEXT:    ld r6, 4(r6)
-; CHECK-NEXT:    mulld r9, r10, r9
-; CHECK-NEXT:    mulld r9, r9, r11
-; CHECK-NEXT:    maddld r3, r9, r6, r3
-; CHECK-NEXT:    mr r6, r8
+; CHECK-NEXT:    ldx r8, r6, r7
+; CHECK-NEXT:    ld r9, 0(r6)
+; CHECK-NEXT:    ldx r10, r6, r5
+; CHECK-NEXT:    ld r11, 4(r6)
+; CHECK-NEXT:    addi r6, r6, 1
+; CHECK-NEXT:    mulld r8, r9, r8
+; CHECK-NEXT:    mulld r8, r8, r10
+; CHECK-NEXT:    maddld r3, r8, r11, r3
 ; CHECK-NEXT:    bdnz .LBB1_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    add r3, r3, r4
@@ -217,25 +216,24 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:    ldx r12, r9, r6
-; CHECK-NEXT:    ld r0, 0(r9)
-; CHECK-NEXT:    ldx r30, r9, r5
-; CHECK-NEXT:    ldx r29, r9, r7
-; CHECK-NEXT:    addi r11, r9, 1
-; CHECK-NEXT:    mulld r12, r0, r12
-; CHECK-NEXT:    ld r28, 4(r9)
-; CHECK-NEXT:    ldx r27, r9, r8
-; CHECK-NEXT:    ld r26, 12(r9)
-; CHECK-NEXT:    ld r25, 8(r9)
-; CHECK-NEXT:    ldx r9, r9, r10
-; CHECK-NEXT:    mulld r12, r12, r30
-; CHECK-NEXT:    mulld r12, r12, r29
-; CHECK-NEXT:    mulld r12, r12, r28
-; CHECK-NEXT:    mulld r12, r12, r27
-; CHECK-NEXT:    mulld r12, r12, r26
-; CHECK-NEXT:    mulld r12, r12, r25
-; CHECK-NEXT:    maddld r3, r12, r9, r3
-; CHECK-NEXT:    mr r9, r11
+; CHECK-NEXT:    ldx r11, r9, r6
+; CHECK-NEXT:    ld r12, 0(r9)
+; CHECK-NEXT:    ldx r0, r9, r5
+; CHECK-NEXT:    ldx r30, r9, r7
+; CHECK-NEXT:    mulld r11, r12, r11
+; CHECK-NEXT:    ld r29, 4(r9)
+; CHECK-NEXT:    ldx r28, r9, r8
+; CHECK-NEXT:    ld r27, 12(r9)
+; CHECK-NEXT:    ld r26, 8(r9)
+; CHECK-NEXT:    ldx r25, r9, r10
+; CHECK-NEXT:    addi r9, r9, 1
+; CHECK-NEXT:    mulld r11, r11, r0
+; CHECK-NEXT:    mulld r11, r11, r30
+; CHECK-NEXT:    mulld r11, r11, r29
+; CHECK-NEXT:    mulld r11, r11, r28
+; CHECK-NEXT:    mulld r11, r11, r27
+; CHECK-NEXT:    mulld r11, r11, r26
+; CHECK-NEXT:    maddld r3, r11, r25, r3
 ; CHECK-NEXT:    bdnz .LBB2_2
 ; CHECK-NEXT:    b .LBB2_4
 ; CHECK-NEXT:  .LBB2_3:
@@ -624,10 +622,10 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    beq cr0, .LBB6_8
 ; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cmpldi r4, 1
 ; CHECK-NEXT:    li r7, 1
 ; CHECK-NEXT:    addi r6, r3, 4009
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
 ; CHECK-NEXT:    iselgt r8, r4, r7
 ; CHECK-NEXT:    lis r4, -21846
@@ -639,11 +637,11 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    li r30, 1
 ; CHECK-NEXT:    ld r5, 0(r5)
 ; CHECK-NEXT:    mtctr r8
-; CHECK-NEXT:    li r8, -9
-; CHECK-NEXT:    addi r5, r5, -1
 ; CHECK-NEXT:    ori r4, r4, 43691
+; CHECK-NEXT:    li r8, -9
 ; CHECK-NEXT:    li r29, 1
 ; CHECK-NEXT:    li r28, 1
+; CHECK-NEXT:    addi r5, r5, -1
 ; CHECK-NEXT:    b .LBB6_4
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB6_2:
@@ -652,8 +650,8 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    ld r0, -8(r6)
 ; CHECK-NEXT:    add r29, r0, r29
 ; CHECK-NEXT:  .LBB6_3:
-; CHECK-NEXT:    addi r6, r6, 1
 ; CHECK-NEXT:    mulld r0, r29, r28
+; CHECK-NEXT:    addi r6, r6, 1
 ; CHECK-NEXT:    mulld r0, r0, r30
 ; CHECK-NEXT:    mulld r0, r0, r12
 ; CHECK-NEXT:    mulld r0, r0, r11
@@ -802,8 +800,8 @@ define float @test_ds_float(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    cmpwi r4, 1
 ; CHECK-NEXT:    blt cr0, .LBB7_4
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi r3, r3, 4002
 ; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    addi r3, r3, 4002
 ; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    mtctr r4
 ; CHECK-NEXT:    li r4, -1
@@ -884,8 +882,8 @@ define float @test_ds_combine_float_int(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    cmpwi r4, 1
 ; CHECK-NEXT:    blt cr0, .LBB8_4
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi r3, r3, 4002
 ; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    addi r3, r3, 4002
 ; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    mtctr r4
 ; CHECK-NEXT:    li r4, -1

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
index 8d96a784f2bf..05f8394352ca 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
@@ -14,9 +14,9 @@
 define void @foo(float* nocapture %data, float %d) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvdpspn 0, 1
 ; CHECK-NEXT:    li 5, 83
 ; CHECK-NEXT:    addi 4, 3, 192
-; CHECK-NEXT:    xscvdpspn 0, 1
 ; CHECK-NEXT:    mtctr 5
 ; CHECK-NEXT:    xxspltw 0, 0, 0
 ; CHECK-NEXT:    .p2align 4

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index fdc0257e4c5a..82e86c5761ef 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -400,9 +400,9 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-NEXT:  .LBB9_2: # %for.body
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    rldic r7, r6, 4, 28
-; CHECK-NEXT:    addi r6, r6, 6
 ; CHECK-NEXT:    xxsetaccz acc2
 ; CHECK-NEXT:    xxsetaccz acc1
+; CHECK-NEXT:    addi r6, r6, 6
 ; CHECK-NEXT:    lxvx vs0, r5, r7
 ; CHECK-NEXT:    add r7, r5, r7
 ; CHECK-NEXT:    lxv vs1, 16(r7)
@@ -414,8 +414,8 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-NEXT:    lxv vs12, 64(r7)
 ; CHECK-NEXT:    lxv vs13, 80(r7)
 ; CHECK-NEXT:    rldic r7, r4, 6, 26
-; CHECK-NEXT:    addi r4, r4, 3
 ; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    addi r4, r4, 3
 ; CHECK-NEXT:    xxmfacc acc1
 ; CHECK-NEXT:    xvf32gernp acc0, vs12, vs13
 ; CHECK-NEXT:    stxvx vs11, r3, r7
@@ -449,9 +449,9 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-BE-NEXT:  .LBB9_2: # %for.body
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    rldic r7, r6, 4, 28
-; CHECK-BE-NEXT:    addi r6, r6, 6
 ; CHECK-BE-NEXT:    xxsetaccz acc2
 ; CHECK-BE-NEXT:    xxsetaccz acc1
+; CHECK-BE-NEXT:    addi r6, r6, 6
 ; CHECK-BE-NEXT:    lxvx vs0, r5, r7
 ; CHECK-BE-NEXT:    add r7, r5, r7
 ; CHECK-BE-NEXT:    lxv vs1, 16(r7)
@@ -463,8 +463,8 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-BE-NEXT:    lxv vs12, 64(r7)
 ; CHECK-BE-NEXT:    lxv vs13, 80(r7)
 ; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
-; CHECK-BE-NEXT:    addi r4, r4, 3
 ; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    addi r4, r4, 3
 ; CHECK-BE-NEXT:    xxmfacc acc1
 ; CHECK-BE-NEXT:    xvf32gernp acc0, vs12, vs13
 ; CHECK-BE-NEXT:    stxvx vs8, r3, r7
@@ -544,8 +544,7 @@ for.body:                                         ; preds = %for.body, %for.body
 declare i32 @testRedundantPrimeUnprimeF()
 define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind {
 ; CHECK-LABEL: testRedundantPrimeUnprime:
-; CHECK:         .localentry testRedundantPrimeUnprime, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r0, 16(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
index d875fe4b7c4a..d5a3cc963f5f 100644
--- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -17,17 +17,17 @@ define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
 ; CHECK-NEXT:    xxsetaccz acc0
 ; CHECK-NEXT:    blt cr0, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    clrldi r6, r5, 32
-; CHECK-NEXT:    addi r5, r4, 32
-; CHECK-NEXT:    addi r6, r6, -2
+; CHECK-NEXT:    clrldi r5, r5, 32
 ; CHECK-NEXT:    lxv vs4, 0(r4)
 ; CHECK-NEXT:    lxv vs5, 16(r4)
-; CHECK-NEXT:    mtctr r6
+; CHECK-NEXT:    addi r4, r4, 32
+; CHECK-NEXT:    addi r5, r5, -2
+; CHECK-NEXT:    mtctr r5
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lxv vs6, 0(r5)
-; CHECK-NEXT:    addi r5, r5, 16
+; CHECK-NEXT:    lxv vs6, 0(r4)
+; CHECK-NEXT:    addi r4, r4, 16
 ; CHECK-NEXT:    xvf64gerpp acc0, vsp4, vs6
 ; CHECK-NEXT:    bdnz .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: # %for.cond.cleanup
@@ -44,17 +44,17 @@ define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
 ; CHECK-BE-NEXT:    xxsetaccz acc0
 ; CHECK-BE-NEXT:    blt cr0, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-BE-NEXT:    clrldi r6, r5, 32
-; CHECK-BE-NEXT:    addi r5, r4, 32
-; CHECK-BE-NEXT:    addi r6, r6, -2
+; CHECK-BE-NEXT:    clrldi r5, r5, 32
 ; CHECK-BE-NEXT:    lxv vs4, 0(r4)
 ; CHECK-BE-NEXT:    lxv vs5, 16(r4)
-; CHECK-BE-NEXT:    mtctr r6
+; CHECK-BE-NEXT:    addi r4, r4, 32
+; CHECK-BE-NEXT:    addi r5, r5, -2
+; CHECK-BE-NEXT:    mtctr r5
 ; CHECK-BE-NEXT:    .p2align 4
 ; CHECK-BE-NEXT:  .LBB0_2: # %for.body
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    lxv vs6, 0(r5)
-; CHECK-BE-NEXT:    addi r5, r5, 16
+; CHECK-BE-NEXT:    lxv vs6, 0(r4)
+; CHECK-BE-NEXT:    addi r4, r4, 16
 ; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp4, vs6
 ; CHECK-BE-NEXT:    bdnz .LBB0_2
 ; CHECK-BE-NEXT:  .LBB0_3: # %for.cond.cleanup
@@ -115,8 +115,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
 ; CHECK-NEXT:    xvf64ger acc0, vsp4, vs6
 ; CHECK-NEXT:    blt cr0, .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    addi r4, r4, 48
 ; CHECK-NEXT:    clrldi r5, r5, 32
+; CHECK-NEXT:    addi r4, r4, 48
 ; CHECK-NEXT:    addi r5, r5, -3
 ; CHECK-NEXT:    mtctr r5
 ; CHECK-NEXT:    .p2align 4
@@ -143,8 +143,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
 ; CHECK-BE-NEXT:    xvf64ger acc0, vsp4, vs6
 ; CHECK-BE-NEXT:    blt cr0, .LBB1_3
 ; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-BE-NEXT:    addi r4, r4, 48
 ; CHECK-BE-NEXT:    clrldi r5, r5, 32
+; CHECK-BE-NEXT:    addi r4, r4, 48
 ; CHECK-BE-NEXT:    addi r5, r5, -3
 ; CHECK-BE-NEXT:    mtctr r5
 ; CHECK-BE-NEXT:    .p2align 4

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index dd4212569c13..039f33a3b117 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -9,10 +9,9 @@ target triple = "powerpc64le-unknown-linux-gnu"
 
 define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) {
 ; CHECK-LABEL: foo:
-; CHECK:         .localentry foo, 1
-; CHECK-NEXT:  # %bb.0: # %entry
-; CHECK-NEXT:    stdu 1, -448(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 448
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stdu 1, -480(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 480
 ; CHECK-NEXT:    .cfi_offset r14, -256
 ; CHECK-NEXT:    .cfi_offset r15, -248
 ; CHECK-NEXT:    .cfi_offset r16, -240
@@ -46,308 +45,318 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    .cfi_offset f30, -16
 ; CHECK-NEXT:    .cfi_offset f31, -8
 ; CHECK-NEXT:    lwz 4, 0(4)
-; CHECK-NEXT:    std 14, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 14, 224(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 232(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    cmpwi 4, 1
-; CHECK-NEXT:    std 16, 208(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 18, 224(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 232(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, 240(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 248(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 22, 256(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, 264(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, 272(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 280(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, 288(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 27, 296(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, 304(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 312(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 30, 320(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 31, 328(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 18, 336(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 19, 344(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 20, 352(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 21, 360(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 22, 368(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 23, 376(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 24, 384(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 25, 392(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 26, 400(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 27, 408(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 28, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 29, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 30, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 31, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 16, 240(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 248(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 256(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 264(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 272(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 280(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, 288(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 296(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 304(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 312(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 320(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 328(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 28, 336(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 344(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 352(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 31, 360(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 18, 368(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 19, 376(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 20, 384(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 21, 392(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 22, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 23, 408(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 24, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 25, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 26, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 27, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 28, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 456(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 30, 464(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 472(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
-; CHECK-NEXT:    lwz 3, 0(3)
-; CHECK-NEXT:    cmpwi 3, 1
+; CHECK-NEXT:    mr 23, 5
+; CHECK-NEXT:    lwz 5, 0(3)
+; CHECK-NEXT:    cmpwi 5, 1
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
-; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    mr 24, 5
-; CHECK-NEXT:    li 5, 9
-; CHECK-NEXT:    mr 11, 7
-; CHECK-NEXT:    ld 12, 640(1)
-; CHECK-NEXT:    std 9, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 10, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT:    mr 7, 6
-; CHECK-NEXT:    ld 6, 544(1)
-; CHECK-NEXT:    lxv 1, 0(9)
-; CHECK-NEXT:    ld 9, 648(1)
-; CHECK-NEXT:    ld 29, 688(1)
-; CHECK-NEXT:    ld 28, 680(1)
-; CHECK-NEXT:    ld 2, 632(1)
-; CHECK-NEXT:    ld 26, 624(1)
-; CHECK-NEXT:    lxv 0, 0(10)
-; CHECK-NEXT:    cmpldi 3, 9
-; CHECK-NEXT:    lxv 4, 0(8)
-; CHECK-NEXT:    ld 30, 664(1)
-; CHECK-NEXT:    ld 10, 704(1)
-; CHECK-NEXT:    ld 27, 672(1)
-; CHECK-NEXT:    ld 25, 616(1)
-; CHECK-NEXT:    ld 23, 608(1)
-; CHECK-NEXT:    ld 22, 600(1)
-; CHECK-NEXT:    ld 21, 592(1)
-; CHECK-NEXT:    ld 19, 584(1)
-; CHECK-NEXT:    ld 17, 576(1)
-; CHECK-NEXT:    iselgt 3, 3, 5
-; CHECK-NEXT:    ld 5, 656(1)
-; CHECK-NEXT:    addi 3, 3, -2
-; CHECK-NEXT:    lwa 20, 0(11)
-; CHECK-NEXT:    lxv 13, 0(12)
-; CHECK-NEXT:    std 6, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 27, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 2, 0(6)
-; CHECK-NEXT:    ld 6, 696(1)
-; CHECK-NEXT:    lxv 34, 0(2)
-; CHECK-NEXT:    lxv 7, 0(29)
-; CHECK-NEXT:    lxv 39, 0(17)
-; CHECK-NEXT:    lxv 38, 0(19)
-; CHECK-NEXT:    lxv 33, 0(21)
-; CHECK-NEXT:    lxv 32, 0(22)
-; CHECK-NEXT:    lxv 37, 0(23)
-; CHECK-NEXT:    lxv 36, 0(25)
-; CHECK-NEXT:    lxv 35, 0(26)
-; CHECK-NEXT:    lxv 11, 0(9)
-; CHECK-NEXT:    lxv 12, 0(30)
-; CHECK-NEXT:    rldicl 3, 3, 61, 3
-; CHECK-NEXT:    addi 0, 3, 1
-; CHECK-NEXT:    ld 3, 560(1)
-; CHECK-NEXT:    sldi 11, 20, 2
-; CHECK-NEXT:    lxv 9, 0(5)
-; CHECK-NEXT:    lxv 10, 0(27)
-; CHECK-NEXT:    lxv 8, 0(28)
-; CHECK-NEXT:    lxv 6, 0(6)
-; CHECK-NEXT:    lxv 5, 0(10)
-; CHECK-NEXT:    lxv 3, 0(3)
-; CHECK-NEXT:    std 3, 96(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 12, 104(1) # 8-byte Folded Spill
-; CHECK-NEXT:    sldi 3, 20, 4
-; CHECK-NEXT:    add 12, 20, 11
-; CHECK-NEXT:    std 8, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 6, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld 8, 552(1)
-; CHECK-NEXT:    sldi 18, 20, 1
+; CHECK-NEXT:    addi 5, 5, 1
+; CHECK-NEXT:    li 20, 9
+; CHECK-NEXT:    ld 28, 728(1)
+; CHECK-NEXT:    ld 19, 616(1)
+; CHECK-NEXT:    lwa 3, 0(7)
+; CHECK-NEXT:    ld 7, 688(1)
+; CHECK-NEXT:    ld 12, 680(1)
+; CHECK-NEXT:    ld 11, 672(1)
+; CHECK-NEXT:    ld 2, 664(1)
+; CHECK-NEXT:    ld 29, 736(1)
+; CHECK-NEXT:    cmpldi 5, 9
+; CHECK-NEXT:    ld 27, 720(1)
+; CHECK-NEXT:    ld 26, 712(1)
+; CHECK-NEXT:    ld 25, 704(1)
+; CHECK-NEXT:    ld 24, 696(1)
+; CHECK-NEXT:    iselgt 5, 5, 20
+; CHECK-NEXT:    ld 30, 656(1)
+; CHECK-NEXT:    ld 22, 648(1)
+; CHECK-NEXT:    ld 21, 640(1)
+; CHECK-NEXT:    ld 20, 632(1)
+; CHECK-NEXT:    ld 18, 608(1)
+; CHECK-NEXT:    ld 17, 600(1)
+; CHECK-NEXT:    ld 16, 592(1)
+; CHECK-NEXT:    ld 14, 584(1)
+; CHECK-NEXT:    sldi 0, 3, 2
+; CHECK-NEXT:    std 5, 216(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 28, 208(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 5, 4
+; CHECK-NEXT:    ld 4, 624(1)
+; CHECK-NEXT:    std 19, 96(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 4, 104(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 11, 0(4)
+; CHECK-NEXT:    mr 4, 5
+; CHECK-NEXT:    ld 5, 216(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 15, 576(1)
+; CHECK-NEXT:    sldi 31, 3, 1
+; CHECK-NEXT:    std 8, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 9, 40(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lxv 41, 0(8)
-; CHECK-NEXT:    add 3, 3, 24
-; CHECK-NEXT:    addi 16, 3, 32
-; CHECK-NEXT:    sldi 3, 20, 3
-; CHECK-NEXT:    std 9, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 5, 120(1) # 8-byte Folded Spill
-; CHECK-NEXT:    sldi 5, 12, 3
-; CHECK-NEXT:    std 26, 80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 2, 88(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 2, 24, 5
-; CHECK-NEXT:    mr 9, 30
-; CHECK-NEXT:    li 26, 1
-; CHECK-NEXT:    add 3, 3, 24
-; CHECK-NEXT:    addi 31, 3, 32
-; CHECK-NEXT:    ld 3, 568(1)
-; CHECK-NEXT:    std 28, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 152(1) # 8-byte Folded Spill
-; CHECK-NEXT:    sldi 5, 20, 5
-; CHECK-NEXT:    add 29, 20, 18
-; CHECK-NEXT:    std 23, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    mulli 27, 20, 48
-; CHECK-NEXT:    add 30, 24, 5
-; CHECK-NEXT:    li 25, 0
-; CHECK-NEXT:    lxv 40, 0(3)
-; CHECK-NEXT:    mulli 23, 20, 6
-; CHECK-NEXT:    sldi 5, 29, 3
-; CHECK-NEXT:    add 28, 24, 5
-; CHECK-NEXT:    mr 5, 24
-; CHECK-NEXT:    std 17, 32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 22, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 8, 6
+; CHECK-NEXT:    sldi 6, 3, 3
+; CHECK-NEXT:    std 2, 144(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 11, 152(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 3, 0(2)
+; CHECK-NEXT:    lxv 2, 0(11)
+; CHECK-NEXT:    lxv 0, 0(7)
+; CHECK-NEXT:    add 6, 6, 23
+; CHECK-NEXT:    lxv 7, 0(28)
+; CHECK-NEXT:    add 28, 3, 31
+; CHECK-NEXT:    lxv 40, 0(9)
+; CHECK-NEXT:    lxv 39, 0(10)
+; CHECK-NEXT:    lxv 38, 0(15)
+; CHECK-NEXT:    lxv 33, 0(14)
+; CHECK-NEXT:    lxv 32, 0(16)
+; CHECK-NEXT:    lxv 37, 0(17)
+; CHECK-NEXT:    lxv 35, 0(18)
+; CHECK-NEXT:    lxv 13, 0(19)
+; CHECK-NEXT:    lxv 10, 0(20)
+; CHECK-NEXT:    lxv 8, 0(21)
+; CHECK-NEXT:    lxv 6, 0(22)
+; CHECK-NEXT:    lxv 4, 0(30)
+; CHECK-NEXT:    lxv 1, 0(12)
+; CHECK-NEXT:    lxv 36, 0(24)
+; CHECK-NEXT:    lxv 34, 0(25)
+; CHECK-NEXT:    lxv 12, 0(26)
+; CHECK-NEXT:    lxv 9, 0(27)
+; CHECK-NEXT:    lxv 5, 0(29)
+; CHECK-NEXT:    addi 5, 5, -2
+; CHECK-NEXT:    sldi 11, 3, 4
+; CHECK-NEXT:    std 12, 160(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 7, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 3, 0
+; CHECK-NEXT:    add 12, 11, 23
+; CHECK-NEXT:    addi 11, 6, 32
+; CHECK-NEXT:    addi 12, 12, 32
+; CHECK-NEXT:    std 22, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mulli 26, 3, 48
+; CHECK-NEXT:    mulli 22, 3, 6
+; CHECK-NEXT:    sldi 6, 7, 3
+; CHECK-NEXT:    add 30, 23, 6
+; CHECK-NEXT:    std 29, 216(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 176(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT:    li 25, 1
+; CHECK-NEXT:    li 24, 0
+; CHECK-NEXT:    std 10, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 14, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 16, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    rldicl 5, 5, 61, 3
+; CHECK-NEXT:    addi 2, 5, 1
+; CHECK-NEXT:    sldi 5, 3, 5
+; CHECK-NEXT:    add 29, 23, 5
+; CHECK-NEXT:    sldi 5, 28, 3
+; CHECK-NEXT:    add 27, 23, 5
+; CHECK-NEXT:    mr 5, 23
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_3: # %_loop_2_do_.lr.ph
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
-; CHECK-NEXT:    maddld 6, 23, 25, 12
-; CHECK-NEXT:    maddld 21, 23, 25, 11
-; CHECK-NEXT:    mtctr 0
+; CHECK-NEXT:    maddld 6, 22, 24, 7
+; CHECK-NEXT:    maddld 20, 22, 24, 0
+; CHECK-NEXT:    mtctr 2
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 22, 24, 6
-; CHECK-NEXT:    sldi 6, 21, 3
-; CHECK-NEXT:    add 21, 24, 6
-; CHECK-NEXT:    maddld 6, 23, 25, 29
+; CHECK-NEXT:    add 21, 23, 6
+; CHECK-NEXT:    sldi 6, 20, 3
+; CHECK-NEXT:    add 20, 23, 6
+; CHECK-NEXT:    maddld 6, 22, 24, 28
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 19, 24, 6
-; CHECK-NEXT:    maddld 6, 23, 25, 18
+; CHECK-NEXT:    add 19, 23, 6
+; CHECK-NEXT:    maddld 6, 22, 24, 31
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 17, 24, 6
-; CHECK-NEXT:    maddld 6, 23, 25, 20
+; CHECK-NEXT:    add 18, 23, 6
+; CHECK-NEXT:    maddld 6, 22, 24, 3
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 15, 24, 6
-; CHECK-NEXT:    mulld 6, 23, 25
+; CHECK-NEXT:    add 17, 23, 6
+; CHECK-NEXT:    mulld 6, 22, 24
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 14, 24, 6
-; CHECK-NEXT:    mr 6, 7
+; CHECK-NEXT:    add 16, 23, 6
+; CHECK-NEXT:    mr 6, 8
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_4: # %_loop_2_do_
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lxvp 42, 0(6)
-; CHECK-NEXT:    lxvp 44, 0(14)
-; CHECK-NEXT:    lxvp 46, 0(15)
-; CHECK-NEXT:    lxvp 48, 0(17)
+; CHECK-NEXT:    lxvp 44, 0(16)
+; CHECK-NEXT:    lxvp 46, 0(17)
+; CHECK-NEXT:    lxvp 48, 0(18)
 ; CHECK-NEXT:    lxvp 50, 0(19)
-; CHECK-NEXT:    lxvp 30, 0(21)
-; CHECK-NEXT:    lxvp 28, 0(22)
+; CHECK-NEXT:    lxvp 30, 0(20)
+; CHECK-NEXT:    lxvp 28, 0(21)
 ; CHECK-NEXT:    lxvp 26, 32(6)
-; CHECK-NEXT:    lxvp 24, 32(14)
-; CHECK-NEXT:    lxvp 22, 32(15)
-; CHECK-NEXT:    lxvp 20, 32(17)
+; CHECK-NEXT:    lxvp 24, 32(16)
+; CHECK-NEXT:    lxvp 22, 32(17)
+; CHECK-NEXT:    lxvp 20, 32(18)
 ; CHECK-NEXT:    lxvp 18, 32(19)
 ; CHECK-NEXT:    addi 6, 6, 64
-; CHECK-NEXT:    addi 14, 14, 64
-; CHECK-NEXT:    addi 15, 15, 64
+; CHECK-NEXT:    addi 16, 16, 64
 ; CHECK-NEXT:    addi 17, 17, 64
+; CHECK-NEXT:    addi 18, 18, 64
 ; CHECK-NEXT:    addi 19, 19, 64
-; CHECK-NEXT:    xvmaddadp 4, 45, 43
-; CHECK-NEXT:    xvmaddadp 1, 47, 43
-; CHECK-NEXT:    xvmaddadp 0, 49, 43
-; CHECK-NEXT:    xvmaddadp 2, 51, 43
-; CHECK-NEXT:    xvmaddadp 41, 31, 43
-; CHECK-NEXT:    xvmaddadp 3, 29, 43
-; CHECK-NEXT:    xvmaddadp 40, 44, 42
-; CHECK-NEXT:    xvmaddadp 39, 46, 42
-; CHECK-NEXT:    xvmaddadp 38, 48, 42
-; CHECK-NEXT:    xvmaddadp 33, 50, 42
-; CHECK-NEXT:    xvmaddadp 32, 30, 42
-; CHECK-NEXT:    xvmaddadp 37, 28, 42
-; CHECK-NEXT:    lxvp 42, 32(21)
-; CHECK-NEXT:    lxvp 44, 32(22)
+; CHECK-NEXT:    xvmaddadp 41, 45, 43
+; CHECK-NEXT:    xvmaddadp 40, 47, 43
+; CHECK-NEXT:    xvmaddadp 39, 49, 43
+; CHECK-NEXT:    xvmaddadp 38, 51, 43
+; CHECK-NEXT:    xvmaddadp 33, 31, 43
+; CHECK-NEXT:    xvmaddadp 32, 29, 43
+; CHECK-NEXT:    xvmaddadp 37, 44, 42
+; CHECK-NEXT:    xvmaddadp 35, 46, 42
+; CHECK-NEXT:    xvmaddadp 13, 48, 42
+; CHECK-NEXT:    xvmaddadp 11, 50, 42
+; CHECK-NEXT:    xvmaddadp 10, 30, 42
+; CHECK-NEXT:    xvmaddadp 8, 28, 42
+; CHECK-NEXT:    lxvp 42, 32(20)
+; CHECK-NEXT:    lxvp 44, 32(21)
+; CHECK-NEXT:    addi 20, 20, 64
 ; CHECK-NEXT:    addi 21, 21, 64
-; CHECK-NEXT:    addi 22, 22, 64
-; CHECK-NEXT:    xvmaddadp 36, 25, 27
-; CHECK-NEXT:    xvmaddadp 35, 23, 27
-; CHECK-NEXT:    xvmaddadp 34, 21, 27
-; CHECK-NEXT:    xvmaddadp 13, 19, 27
-; CHECK-NEXT:    xvmaddadp 12, 24, 26
-; CHECK-NEXT:    xvmaddadp 10, 22, 26
-; CHECK-NEXT:    xvmaddadp 8, 20, 26
-; CHECK-NEXT:    xvmaddadp 7, 18, 26
-; CHECK-NEXT:    xvmaddadp 11, 43, 27
-; CHECK-NEXT:    xvmaddadp 9, 45, 27
-; CHECK-NEXT:    xvmaddadp 6, 42, 26
+; CHECK-NEXT:    xvmaddadp 6, 25, 27
+; CHECK-NEXT:    xvmaddadp 4, 23, 27
+; CHECK-NEXT:    xvmaddadp 3, 21, 27
+; CHECK-NEXT:    xvmaddadp 2, 19, 27
+; CHECK-NEXT:    xvmaddadp 36, 24, 26
+; CHECK-NEXT:    xvmaddadp 34, 22, 26
+; CHECK-NEXT:    xvmaddadp 12, 20, 26
+; CHECK-NEXT:    xvmaddadp 9, 18, 26
+; CHECK-NEXT:    xvmaddadp 1, 43, 27
+; CHECK-NEXT:    xvmaddadp 0, 45, 27
+; CHECK-NEXT:    xvmaddadp 7, 42, 26
 ; CHECK-NEXT:    xvmaddadp 5, 44, 26
 ; CHECK-NEXT:    bdnz .LBB0_4
 ; CHECK-NEXT:  # %bb.5: # %_loop_2_endl_
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    addi 26, 26, 6
-; CHECK-NEXT:    add 5, 5, 27
-; CHECK-NEXT:    add 31, 31, 27
-; CHECK-NEXT:    add 2, 2, 27
-; CHECK-NEXT:    add 16, 16, 27
-; CHECK-NEXT:    add 30, 30, 27
-; CHECK-NEXT:    add 28, 28, 27
-; CHECK-NEXT:    addi 25, 25, 1
-; CHECK-NEXT:    cmpld 26, 4
+; CHECK-NEXT:    addi 25, 25, 6
+; CHECK-NEXT:    add 5, 5, 26
+; CHECK-NEXT:    add 11, 11, 26
+; CHECK-NEXT:    add 30, 30, 26
+; CHECK-NEXT:    add 12, 12, 26
+; CHECK-NEXT:    add 29, 29, 26
+; CHECK-NEXT:    add 27, 27, 26
+; CHECK-NEXT:    addi 24, 24, 1
+; CHECK-NEXT:    cmpld 25, 4
 ; CHECK-NEXT:    ble 0, .LBB0_3
 ; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
-; CHECK-NEXT:    ld 4, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 4, 0(4)
-; CHECK-NEXT:    ld 4, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 1, 0(4)
-; CHECK-NEXT:    ld 4, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 0, 0(4)
-; CHECK-NEXT:    ld 4, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 2, 0(4)
-; CHECK-NEXT:    ld 4, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 41, 0(8)
-; CHECK-NEXT:    stxv 3, 0(4)
-; CHECK-NEXT:    stxv 40, 0(3)
 ; CHECK-NEXT:    ld 3, 32(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 39, 0(3)
+; CHECK-NEXT:    stxv 41, 0(3)
 ; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 38, 0(3)
+; CHECK-NEXT:    stxv 40, 0(3)
 ; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 33, 0(3)
+; CHECK-NEXT:    stxv 39, 0(3)
 ; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 32, 0(3)
+; CHECK-NEXT:    stxv 38, 0(3)
 ; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 37, 0(3)
+; CHECK-NEXT:    stxv 33, 0(3)
 ; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 36, 0(3)
+; CHECK-NEXT:    stxv 32, 0(3)
 ; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 35, 0(3)
+; CHECK-NEXT:    stxv 37, 0(3)
 ; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 34, 0(3)
-; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 35, 0(3)
+; CHECK-NEXT:    ld 3, 96(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 13, 0(3)
-; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 11, 0(3)
+; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 10, 0(3)
 ; CHECK-NEXT:    ld 3, 120(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 9, 0(3)
+; CHECK-NEXT:    stxv 8, 0(3)
+; CHECK-NEXT:    ld 3, 128(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 6, 0(3)
 ; CHECK-NEXT:    ld 3, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 12, 0(9)
-; CHECK-NEXT:    stxv 10, 0(3)
+; CHECK-NEXT:    stxv 4, 0(3)
 ; CHECK-NEXT:    ld 3, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 8, 0(3)
+; CHECK-NEXT:    stxv 3, 0(3)
 ; CHECK-NEXT:    ld 3, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 7, 0(3)
+; CHECK-NEXT:    stxv 2, 0(3)
 ; CHECK-NEXT:    ld 3, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 6, 0(3)
-; CHECK-NEXT:    stxv 5, 0(10)
+; CHECK-NEXT:    stxv 1, 0(3)
+; CHECK-NEXT:    ld 3, 168(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 0, 0(3)
+; CHECK-NEXT:    ld 3, 176(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 36, 0(3)
+; CHECK-NEXT:    ld 3, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 34, 0(3)
+; CHECK-NEXT:    ld 3, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 12, 0(3)
+; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 9, 0(3)
+; CHECK-NEXT:    ld 3, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 7, 0(3)
+; CHECK-NEXT:    ld 3, 216(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 5, 0(3)
 ; CHECK-NEXT:  .LBB0_7: # %_return_bb
-; CHECK-NEXT:    lfd 31, 440(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 30, 432(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 31, 328(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 30, 320(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, 312(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, 304(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 27, 296(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 26, 288(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 25, 280(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 29, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 24, 272(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 23, 264(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 22, 256(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 28, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, 248(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, 240(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, 232(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 27, 408(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 18, 224(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 17, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 16, 208(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 26, 400(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 14, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 25, 392(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 24, 384(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 23, 376(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 22, 368(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 21, 360(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 20, 352(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 19, 344(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 18, 336(1) # 8-byte Folded Reload
-; CHECK-NEXT:    addi 1, 1, 448
+; CHECK-NEXT:    lfd 31, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 30, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 31, 360(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 30, 352(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, 344(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 28, 336(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 27, 328(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 26, 320(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 25, 312(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 29, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 24, 304(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 23, 296(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, 288(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 28, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, 280(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, 272(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, 264(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 27, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 18, 256(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 17, 248(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 16, 240(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 26, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 15, 232(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 14, 224(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 25, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 24, 416(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 23, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 22, 400(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 21, 392(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 20, 384(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 19, 376(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 18, 368(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 480
 ; CHECK-NEXT:    blr
 entry:
   %_val_l_ = load i32, i32* %.l, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
index 0d67a7f44f37..423c61d800c0 100644
--- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
+++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
@@ -375,9 +375,9 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    ld 12, 384(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lwz 4, 396(1) # 4-byte Folded Reload
-; CHECK-NEXT:    addi 4, 4, 1
 ; CHECK-NEXT:    std 3, 0(12)
 ; CHECK-NEXT:    ld 12, 376(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 4, 4, 1
 ; CHECK-NEXT:    std 3, 0(12)
 ; CHECK-NEXT:    ld 12, 368(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 3, 0(12)

diff  --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll
index 9cfe20e233aa..e3591b77fd75 100644
--- a/llvm/test/CodeGen/PowerPC/sched-addi.ll
+++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll
@@ -15,8 +15,8 @@ define dso_local void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_typ
 ; CHECK-P9-NEXT:    ld 5, 0(5)
 ; CHECK-P9-NEXT:    addis 6, 2, scalars at toc@ha
 ; CHECK-P9-NEXT:    addi 6, 6, scalars at toc@l
-; CHECK-P9-NEXT:    addi 6, 6, 16
 ; CHECK-P9-NEXT:    rldicr 5, 5, 0, 58
+; CHECK-P9-NEXT:    addi 6, 6, 16
 ; CHECK-P9-NEXT:    addi 5, 5, -32
 ; CHECK-P9-NEXT:    lxvdsx 0, 0, 6
 ; CHECK-P9-NEXT:    rldicl 5, 5, 59, 5
@@ -35,9 +35,9 @@ define dso_local void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_typ
 ; CHECK-P9-NEXT:    xvmuldp 1, 1, 0
 ; CHECK-P9-NEXT:    xvmuldp 4, 4, 0
 ; CHECK-P9-NEXT:    xvmuldp 3, 3, 0
+; CHECK-P9-NEXT:    xvmuldp 6, 6, 0
 ; CHECK-P9-NEXT:    xvmuldp 5, 5, 0
 ; CHECK-P9-NEXT:    addi 4, 4, 256
-; CHECK-P9-NEXT:    xvmuldp 6, 6, 0
 ; CHECK-P9-NEXT:    stxv 1, 16(3)
 ; CHECK-P9-NEXT:    stxv 2, 0(3)
 ; CHECK-P9-NEXT:    stxv 3, 48(3)

diff  --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
index be4e3908944a..563269595fbf 100644
--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
@@ -22,22 +22,22 @@ define void @print_res() nounwind {
 ; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    li 7, -1
 ; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    li 7, -1
 ; CHECK-NEXT:    mtctr 3
-; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    lbz 5, 0(5)
+; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    bdz .LBB0_6
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    addi 8, 7, -1
 ; CHECK-NEXT:    xori 6, 5, 84
 ; CHECK-NEXT:    clrldi 5, 7, 32
+; CHECK-NEXT:    addi 3, 3, 1
+; CHECK-NEXT:    addi 8, 7, -1
 ; CHECK-NEXT:    lbz 5, 0(5)
 ; CHECK-NEXT:    bdz .LBB0_5
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    addi 3, 3, 1
 ; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    addi 3, 3, 1
 ; CHECK-NEXT:    srwi 7, 6, 5
 ; CHECK-NEXT:    xori 6, 5, 84
 ; CHECK-NEXT:    clrldi 5, 8, 32
@@ -46,12 +46,12 @@ define void @print_res() nounwind {
 ; CHECK-NEXT:    bdz .LBB0_4
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_3:
-; CHECK-NEXT:    addi 3, 3, 1
 ; CHECK-NEXT:    clrldi 10, 8, 32
-; CHECK-NEXT:    addi 8, 8, -1
 ; CHECK-NEXT:    cntlzw 9, 6
 ; CHECK-NEXT:    xori 6, 5, 84
+; CHECK-NEXT:    addi 8, 8, -1
 ; CHECK-NEXT:    lbz 5, 0(10)
+; CHECK-NEXT:    addi 3, 3, 1
 ; CHECK-NEXT:    add 4, 4, 7
 ; CHECK-NEXT:    srwi 7, 9, 5
 ; CHECK-NEXT:    bdnz .LBB0_3

diff  --git a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
index 45aaef62a912..1fff3e58269c 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
@@ -14,17 +14,17 @@ define void @main() nounwind #0 {
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    bl calloc
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    clrldi 4, 30, 32
 ; CHECK-NEXT:    li 5, 0
 ; CHECK-NEXT:    addi 3, 3, -4
-; CHECK-NEXT:    li 6, 1
-; CHECK-NEXT:    clrldi 4, 30, 32
 ; CHECK-NEXT:    mtctr 4
 ; CHECK-NEXT:    mullw 4, 5, 5
+; CHECK-NEXT:    li 6, 1
 ; CHECK-NEXT:    bdz .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi 5, 6, 1
 ; CHECK-NEXT:    stwu 4, 4(3)
 ; CHECK-NEXT:    mullw 4, 6, 6
+; CHECK-NEXT:    addi 5, 6, 1
 ; CHECK-NEXT:    bdz .LBB0_3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2:

diff  --git a/llvm/test/CodeGen/PowerPC/sms-simple.ll b/llvm/test/CodeGen/PowerPC/sms-simple.ll
index d147079a9fb9..9cac77b5540a 100644
--- a/llvm/test/CodeGen/PowerPC/sms-simple.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-simple.ll
@@ -9,15 +9,15 @@
 define dso_local i32* @foo() local_unnamed_addr {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, x at toc@ha
-; CHECK-NEXT:    addis r6, r2, y at toc@ha
+; CHECK-NEXT:    addis r5, r2, y at toc@ha
 ; CHECK-NEXT:    li r7, 340
-; CHECK-NEXT:    addi r5, r5, x at toc@l
-; CHECK-NEXT:    addi r5, r5, -8
-; CHECK-NEXT:    addi r3, r6, y at toc@l
-; CHECK-NEXT:    lwz r6, y at toc@l(r6)
+; CHECK-NEXT:    addi r3, r5, y at toc@l
+; CHECK-NEXT:    lwz r6, y at toc@l(r5)
+; CHECK-NEXT:    addis r5, r2, x at toc@ha
 ; CHECK-NEXT:    mtctr r7
+; CHECK-NEXT:    addi r5, r5, x at toc@l
 ; CHECK-NEXT:    addi r4, r3, -8
+; CHECK-NEXT:    addi r5, r5, -8
 ; CHECK-NEXT:    lwzu r7, 12(r5)
 ; CHECK-NEXT:    maddld r6, r7, r7, r6
 ; CHECK-NEXT:    lwz r7, 4(r5)

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
index eef02e77c2b1..dcd4dda985e4 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
@@ -50,9 +50,9 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-P9-LE-NEXT:    std r31, -8(r1)
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-P9-LE-NEXT:    addi r3, r3, 15
 ; CHECK-P9-LE-NEXT:    li r6, -32768
 ; CHECK-P9-LE-NEXT:    mr r31, r1
+; CHECK-P9-LE-NEXT:    addi r3, r3, 15
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r5, r3
@@ -189,9 +189,9 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-P9-LE-NEXT:    std r31, -8(r1)
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r4, r3, 2, 30
-; CHECK-P9-LE-NEXT:    addi r4, r4, 15
 ; CHECK-P9-LE-NEXT:    li r7, -4096
 ; CHECK-P9-LE-NEXT:    mr r31, r1
+; CHECK-P9-LE-NEXT:    addi r4, r4, 15
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r6, r4
@@ -333,10 +333,10 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-P9-LE-NEXT:    std r31, -8(r1)
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-P9-LE-NEXT:    addi r3, r3, 15
 ; CHECK-P9-LE-NEXT:    lis r5, -1
-; CHECK-P9-LE-NEXT:    ori r5, r5, 0
 ; CHECK-P9-LE-NEXT:    mr r31, r1
+; CHECK-P9-LE-NEXT:    addi r3, r3, 15
+; CHECK-P9-LE-NEXT:    ori r5, r5, 0
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r6, r3


        


More information about the llvm-commits mailing list