[llvm] 2fea3fe - [MachineScheduler] Update available queue on the first mop of a new cycle

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 9 11:15:08 PDT 2020


Author: David Green
Date: 2020-06-09T19:13:53+01:00
New Revision: 2fea3fe41c5a177d019dd99fb1b43d767eccde24

URL: https://github.com/llvm/llvm-project/commit/2fea3fe41c5a177d019dd99fb1b43d767eccde24
DIFF: https://github.com/llvm/llvm-project/commit/2fea3fe41c5a177d019dd99fb1b43d767eccde24.diff

LOG: [MachineScheduler] Update available queue on the first mop of a new cycle

If a resource can be held for multiple cycles in the schedule model
then an instruction can be placed into the available queue, another
instruction can be scheduled, but the first will not be taken back out if
the two instructions hazard. To fix this make sure that we update the
available queue even on the first MOp of a cycle, pushing available
instructions back into the pending queue if they now conflict.

This happens with some downstream schedules we have around MVE
instruction scheduling where we use ResourceCycles=[2] to show the
instruction executing over two beats. Apparently the test changes here
are OK too.

Differential Revision: https://reviews.llvm.org/D76909

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineScheduler.cpp
    llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
    llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
    llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
    llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
    llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
    llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
    llvm/test/CodeGen/PowerPC/inc-of-add.ll
    llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
    llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/pr43976.ll
    llvm/test/CodeGen/PowerPC/spe.ll
    llvm/test/CodeGen/PowerPC/sub-of-not.ll
    llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/PowerPC/vec_splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index a68899191374..0f21c97a30f6 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2424,16 +2424,14 @@ SUnit *SchedBoundary::pickOnlyChoice() {
   if (CheckPending)
     releasePending();
 
-  if (CurrMOps > 0) {
-    // Defer any ready instrs that now have a hazard.
-    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
-      if (checkHazard(*I)) {
-        Pending.push(*I);
-        I = Available.remove(I);
-        continue;
-      }
-      ++I;
+  // Defer any ready instrs that now have a hazard.
+  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+    if (checkHazard(*I)) {
+      Pending.push(*I);
+      I = Available.remove(I);
+      continue;
     }
+    ++I;
   }
   for (unsigned i = 0; Available.empty(); ++i) {
 //  FIXME: Re-enable assert once PR20057 is resolved.

diff  --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
index 70038e934c9f..95a419bd7398 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
@@ -79,7 +79,7 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 
 ; CHECK-LABEL: aesea:
 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECK-NEXT: aesmc [[VA]], [[VA]]
+; CHECK: aesmc [[VA]], [[VA]]
 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECK-NEXT: aesmc [[VB]], [[VB]]
 ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
@@ -163,7 +163,7 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 
 ; CHECK-LABEL: aesda:
 ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECK-NEXT: aesimc [[VA]], [[VA]]
+; CHECK: aesimc [[VA]], [[VA]]
 ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECK-NEXT: aesimc [[VB]], [[VB]]
 ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}

diff  --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index d216cf59bde2..9af68e7d8012 100644
--- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -2,7 +2,7 @@
 
 define i32 @foo() nounwind {
 entry:
-; CHECK: cntlzw 3, 4
+; CHECK: cntlzw 3, 3
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]

diff  --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 7897d1c6b8a5..028904fc3200 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -9,29 +9,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    stwu 1, -464(1)
 ; CHECK-NEXT:    mfcr 12
 ; CHECK-NEXT:    stw 29, 412(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    lis 3, .LCPI0_0 at ha
+; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    stw 12, 408(1)
 ; CHECK-NEXT:    stfd 2, 376(1)
-; CHECK-NEXT:    stfd 27, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 1, 384(1)
-; CHECK-NEXT:    stfd 28, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 29, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 30, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 31, 456(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lwz 4, 380(1)
-; CHECK-NEXT:    lfs 27, .LCPI0_0 at l(3)
-; CHECK-NEXT:    lwz 3, 384(1)
+; CHECK-NEXT:    stfd 27, 424(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stw 4, 396(1)
-; CHECK-NEXT:    fcmpu 0, 2, 27
 ; CHECK-NEXT:    lwz 4, 376(1)
+; CHECK-NEXT:    lfs 27, .LCPI0_0 at l(3)
+; CHECK-NEXT:    stfd 1, 384(1)
+; CHECK-NEXT:    stw 4, 392(1)
+; CHECK-NEXT:    fcmpu 0, 2, 27
+; CHECK-NEXT:    lwz 4, 388(1)
 ; CHECK-NEXT:    fcmpu 1, 1, 27
+; CHECK-NEXT:    lwz 3, 384(1)
 ; CHECK-NEXT:    crand 20, 6, 0
 ; CHECK-NEXT:    cror 20, 4, 20
-; CHECK-NEXT:    stw 4, 392(1)
-; CHECK-NEXT:    stw 3, 400(1)
-; CHECK-NEXT:    lwz 4, 388(1)
+; CHECK-NEXT:    stfd 28, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 30, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 456(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stw 4, 404(1)
+; CHECK-NEXT:    stw 3, 400(1)
 ; CHECK-NEXT:    bc 4, 20, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb5
 ; CHECK-NEXT:    li 3, 0
@@ -41,54 +41,53 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 0, 400(1)
 ; CHECK-NEXT:    lis 3, 15856
 ; CHECK-NEXT:    stw 3, 336(1)
-; CHECK-NEXT:    lfd 1, 392(1)
 ; CHECK-NEXT:    li 29, 0
 ; CHECK-NEXT:    stfd 0, 304(1)
-; CHECK-NEXT:    stw 29, 340(1)
-; CHECK-NEXT:    stw 29, 332(1)
-; CHECK-NEXT:    stw 29, 328(1)
 ; CHECK-NEXT:    lwz 3, 308(1)
-; CHECK-NEXT:    stfd 1, 296(1)
-; CHECK-NEXT:    lfd 3, 336(1)
-; CHECK-NEXT:    lfd 4, 328(1)
+; CHECK-NEXT:    lfd 1, 392(1)
 ; CHECK-NEXT:    stw 3, 324(1)
 ; CHECK-NEXT:    lwz 3, 304(1)
+; CHECK-NEXT:    stfd 1, 296(1)
 ; CHECK-NEXT:    stw 3, 320(1)
 ; CHECK-NEXT:    lwz 3, 300(1)
-; CHECK-NEXT:    lfd 31, 320(1)
+; CHECK-NEXT:    stw 29, 340(1)
 ; CHECK-NEXT:    stw 3, 316(1)
-; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 296(1)
+; CHECK-NEXT:    stw 29, 332(1)
 ; CHECK-NEXT:    stw 3, 312(1)
+; CHECK-NEXT:    stw 29, 328(1)
+; CHECK-NEXT:    lfd 31, 320(1)
 ; CHECK-NEXT:    lfd 30, 312(1)
+; CHECK-NEXT:    lfd 3, 336(1)
+; CHECK-NEXT:    fmr 1, 31
+; CHECK-NEXT:    lfd 4, 328(1)
 ; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    bl __gcc_qmul
 ; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stfd 1, 280(1)
-; CHECK-NEXT:    stw 3, 368(1)
-; CHECK-NEXT:    stfd 2, 288(1)
-; CHECK-NEXT:    stw 29, 372(1)
-; CHECK-NEXT:    stw 29, 364(1)
-; CHECK-NEXT:    stw 29, 360(1)
 ; CHECK-NEXT:    fmr 29, 1
-; CHECK-NEXT:    lwz 3, 284(1)
+; CHECK-NEXT:    stw 3, 368(1)
 ; CHECK-NEXT:    fmr 28, 2
-; CHECK-NEXT:    lfd 3, 368(1)
-; CHECK-NEXT:    lfd 4, 360(1)
+; CHECK-NEXT:    lwz 3, 284(1)
+; CHECK-NEXT:    stfd 2, 288(1)
 ; CHECK-NEXT:    stw 3, 356(1)
 ; CHECK-NEXT:    lwz 3, 280(1)
+; CHECK-NEXT:    stw 29, 372(1)
 ; CHECK-NEXT:    stw 3, 352(1)
 ; CHECK-NEXT:    lwz 3, 292(1)
-; CHECK-NEXT:    lfd 1, 352(1)
+; CHECK-NEXT:    stw 29, 364(1)
 ; CHECK-NEXT:    stw 3, 348(1)
 ; CHECK-NEXT:    lwz 3, 288(1)
+; CHECK-NEXT:    stw 29, 360(1)
 ; CHECK-NEXT:    stw 3, 344(1)
+; CHECK-NEXT:    lfd 3, 368(1)
+; CHECK-NEXT:    lfd 4, 360(1)
+; CHECK-NEXT:    lfd 1, 352(1)
 ; CHECK-NEXT:    lfd 2, 344(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    mtfsb1 31
 ; CHECK-NEXT:    lis 3, .LCPI0_1 at ha
-; CHECK-NEXT:    fcmpu 0, 28, 27
 ; CHECK-NEXT:    mtfsb0 30
 ; CHECK-NEXT:    fadd 1, 2, 1
 ; CHECK-NEXT:    mtfsf 1, 0
@@ -102,6 +101,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfs 1, .LCPI0_1 at l(3)
 ; CHECK-NEXT:    fctiwz 0, 0
 ; CHECK-NEXT:    stfd 0, 152(1)
+; CHECK-NEXT:    fcmpu 0, 28, 27
 ; CHECK-NEXT:    lwz 3, 164(1)
 ; CHECK-NEXT:    fcmpu 1, 29, 1
 ; CHECK-NEXT:    lwz 4, 156(1)
@@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    bl __floatditf
 ; CHECK-NEXT:    lis 3, 17392
 ; CHECK-NEXT:    stfd 1, 208(1)
-; CHECK-NEXT:    stw 3, 240(1)
-; CHECK-NEXT:    stfd 2, 200(1)
-; CHECK-NEXT:    stw 29, 244(1)
-; CHECK-NEXT:    stw 29, 236(1)
-; CHECK-NEXT:    stw 29, 232(1)
 ; CHECK-NEXT:    fmr 29, 1
-; CHECK-NEXT:    lwz 3, 212(1)
+; CHECK-NEXT:    stw 3, 240(1)
 ; CHECK-NEXT:    fmr 28, 2
-; CHECK-NEXT:    lfd 3, 240(1)
-; CHECK-NEXT:    lfd 4, 232(1)
+; CHECK-NEXT:    lwz 3, 212(1)
 ; CHECK-NEXT:    cmpwi 2, 30, 0
+; CHECK-NEXT:    stfd 2, 200(1)
 ; CHECK-NEXT:    stw 3, 228(1)
 ; CHECK-NEXT:    lwz 3, 208(1)
+; CHECK-NEXT:    stw 29, 244(1)
 ; CHECK-NEXT:    stw 3, 224(1)
 ; CHECK-NEXT:    lwz 3, 204(1)
-; CHECK-NEXT:    lfd 1, 224(1)
+; CHECK-NEXT:    stw 29, 236(1)
 ; CHECK-NEXT:    stw 3, 220(1)
 ; CHECK-NEXT:    lwz 3, 200(1)
+; CHECK-NEXT:    stw 29, 232(1)
 ; CHECK-NEXT:    stw 3, 216(1)
+; CHECK-NEXT:    lfd 3, 240(1)
+; CHECK-NEXT:    lfd 4, 232(1)
+; CHECK-NEXT:    lfd 1, 224(1)
 ; CHECK-NEXT:    lfd 2, 216(1)
 ; CHECK-NEXT:    bl __gcc_qadd
 ; CHECK-NEXT:    blt 2, .LBB0_7
@@ -150,9 +150,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    fmr 1, 29
 ; CHECK-NEXT:  .LBB0_9: # %bb1
 ; CHECK-NEXT:    stfd 1, 184(1)
-; CHECK-NEXT:    stfd 2, 192(1)
 ; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 188(1)
+; CHECK-NEXT:    stfd 2, 192(1)
 ; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    stw 3, 260(1)
 ; CHECK-NEXT:    lwz 3, 184(1)
@@ -165,10 +165,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 4, 248(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    stfd 2, 176(1)
-; CHECK-NEXT:    stfd 1, 168(1)
 ; CHECK-NEXT:    fcmpu 1, 2, 27
 ; CHECK-NEXT:    lwz 3, 180(1)
 ; CHECK-NEXT:    fcmpu 0, 1, 27
+; CHECK-NEXT:    stfd 1, 168(1)
 ; CHECK-NEXT:    crandc 20, 2, 4
 ; CHECK-NEXT:    stw 3, 268(1)
 ; CHECK-NEXT:    lwz 3, 176(1)
@@ -184,27 +184,27 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    cror 20, 1, 3
 ; CHECK-NEXT:    bc 12, 20, .LBB0_14
 ; CHECK-NEXT:  # %bb.11: # %bb2
-; CHECK-NEXT:    fneg 28, 31
-; CHECK-NEXT:    stfd 28, 48(1)
+; CHECK-NEXT:    fneg 29, 31
+; CHECK-NEXT:    stfd 29, 48(1)
 ; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stw 3, 80(1)
-; CHECK-NEXT:    fneg 29, 30
+; CHECK-NEXT:    fneg 28, 30
 ; CHECK-NEXT:    lwz 3, 52(1)
-; CHECK-NEXT:    stfd 29, 40(1)
 ; CHECK-NEXT:    li 29, 0
-; CHECK-NEXT:    stw 29, 84(1)
-; CHECK-NEXT:    stw 29, 76(1)
-; CHECK-NEXT:    stw 29, 72(1)
+; CHECK-NEXT:    stfd 28, 40(1)
 ; CHECK-NEXT:    stw 3, 68(1)
-; CHECK-NEXT:    lfd 3, 80(1)
-; CHECK-NEXT:    lfd 4, 72(1)
 ; CHECK-NEXT:    lwz 3, 48(1)
+; CHECK-NEXT:    stw 29, 84(1)
 ; CHECK-NEXT:    stw 3, 64(1)
 ; CHECK-NEXT:    lwz 3, 44(1)
-; CHECK-NEXT:    lfd 1, 64(1)
+; CHECK-NEXT:    stw 29, 76(1)
 ; CHECK-NEXT:    stw 3, 60(1)
 ; CHECK-NEXT:    lwz 3, 40(1)
+; CHECK-NEXT:    stw 29, 72(1)
 ; CHECK-NEXT:    stw 3, 56(1)
+; CHECK-NEXT:    lfd 3, 80(1)
+; CHECK-NEXT:    lfd 4, 72(1)
+; CHECK-NEXT:    lfd 1, 64(1)
 ; CHECK-NEXT:    lfd 2, 56(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
@@ -220,12 +220,12 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfs 0, .LCPI0_2 at l(3)
 ; CHECK-NEXT:    lis 3, .LCPI0_3 at ha
 ; CHECK-NEXT:    mtfsb0 30
-; CHECK-NEXT:    fadd 2, 29, 28
+; CHECK-NEXT:    fadd 2, 28, 29
 ; CHECK-NEXT:    mtfsf 1, 1
 ; CHECK-NEXT:    lfs 1, .LCPI0_3 at l(3)
-; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 24(1)
+; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    lwz 3, 36(1)
 ; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 28(1)
@@ -244,22 +244,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    stfd 31, 112(1)
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    stw 3, 148(1)
+; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    stw 3, 140(1)
 ; CHECK-NEXT:    stw 3, 136(1)
-; CHECK-NEXT:    stfd 30, 104(1)
-; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    lwz 3, 116(1)
-; CHECK-NEXT:    stw 4, 144(1)
-; CHECK-NEXT:    lfd 4, 136(1)
+; CHECK-NEXT:    stfd 30, 104(1)
 ; CHECK-NEXT:    stw 3, 132(1)
-; CHECK-NEXT:    lfd 3, 144(1)
 ; CHECK-NEXT:    lwz 3, 112(1)
+; CHECK-NEXT:    stw 4, 144(1)
 ; CHECK-NEXT:    stw 3, 128(1)
 ; CHECK-NEXT:    lwz 3, 108(1)
-; CHECK-NEXT:    lfd 1, 128(1)
+; CHECK-NEXT:    lfd 3, 144(1)
 ; CHECK-NEXT:    stw 3, 124(1)
 ; CHECK-NEXT:    lwz 3, 104(1)
+; CHECK-NEXT:    lfd 4, 136(1)
 ; CHECK-NEXT:    stw 3, 120(1)
+; CHECK-NEXT:    lfd 1, 128(1)
 ; CHECK-NEXT:    lfd 2, 120(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
@@ -278,9 +278,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    fadd 2, 30, 31
 ; CHECK-NEXT:    mtfsf 1, 1
 ; CHECK-NEXT:    lfs 1, .LCPI0_1 at l(3)
-; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 88(1)
+; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    lwz 3, 100(1)
 ; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 92(1)
@@ -300,8 +300,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 28, 432(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lwz 12, 408(1)
 ; CHECK-NEXT:    lfd 27, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lwz 30, 416(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    mtcrf 32, 12 # cr2
+; CHECK-NEXT:    lwz 30, 416(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 412(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 0, 468(1)
 ; CHECK-NEXT:    addi 1, 1, 464

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index d155a7881225..52070aa9063d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -704,8 +704,8 @@ declare void @test_vararg(i32, ...)
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
@@ -773,8 +773,8 @@ entry:
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
@@ -844,8 +844,8 @@ entry:
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
index 9f521788a3fc..c276d4ccc395 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
@@ -68,15 +68,15 @@
 ; 32BIT-DAG:     STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4)
 ; 32BIT-DAG:     STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4)
 ; 32BIT-DAG:     STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4)
-; 32BIT-DAG:     STW killed renamable $r5, 0, %stack.1.arg2 :: (store 4 into %ir.arg2)
-; 32BIT-DAG:     renamable $r5 = ADDI %fixed-stack.0, 4
-; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.1)
-; 32BIT-DAG:     renamable $r4 = ADDI %fixed-stack.0, 0
-; 32BIT-DAG:     STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.0)
-; 32BIT-DAG:     STW renamable $r5, 0, %stack.0.arg1 :: (store 4 into %ir.arg1)
-; 32BIT-DAG:     renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2)
-; 32BIT-DAG:     renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4)
-; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3
+; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2)
+; 32BIT-DAG:     renamable $r4 = ADDI %fixed-stack.0, 4
+; 32BIT-DAG:     STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1)
+; 32BIT-DAG:     renamable $r11 = ADDI %fixed-stack.0, 0
+; 32BIT-DAG:     STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0)
+; 32BIT-DAG:     STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1)
+; 32BIT-DAG:     renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2)
+; 32BIT-DAG:     renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4)
+; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
 ; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
 ; 32BIT-DAG:     BLR implicit $lr, implicit $rm, implicit $r3
 

diff  --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 54ceccd9c59a..fa57f50cb43d 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -28,9 +28,9 @@ entry:
 ; PPC32-DAG: stfd 2, 16(1)
 ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
 ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
 ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
-; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]]
 ; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]]
 ; PPC32: blr
@@ -68,9 +68,9 @@ entry:
 ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
 ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
-; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
 ; PPC32-NOT: BARRIER
 ; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
 ; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768
 ; PPC32: blr
 	%0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x

diff  --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
index fa03379a3c30..90004143326f 100644
--- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll
+++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
@@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
 ; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 4, 119(1)
-; PPC32-NEXT:    lbz 11, 115(1)
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 4, 4, 6
 ; PPC32-NEXT:    lbz 21, 123(1)
-; PPC32-NEXT:    lbz 6, 131(1)
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 23, 115(1)
+; PPC32-NEXT:    lbz 22, 119(1)
 ; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    lbz 24, 83(1)
-; PPC32-NEXT:    lbz 23, 79(1)
-; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 5, 23, 5
+; PPC32-NEXT:    lbz 23, 127(1)
+; PPC32-NEXT:    add 6, 22, 6
+; PPC32-NEXT:    lbz 22, 131(1)
 ; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    lbz 9, 143(1)
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 8, 11, 8
-; PPC32-NEXT:    lbz 22, 75(1)
-; PPC32-NEXT:    lbz 11, 139(1)
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 24, 21, 24
-; PPC32-NEXT:    lbz 27, 95(1)
-; PPC32-NEXT:    lbz 21, 159(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 9, 23
-; PPC32-NEXT:    lbz 26, 91(1)
-; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    add 8, 23, 8
+; PPC32-NEXT:    lbz 26, 83(1)
+; PPC32-NEXT:    add 9, 22, 9
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 11, 11, 22
-; PPC32-NEXT:    lbz 25, 87(1)
-; PPC32-NEXT:    lbz 22, 151(1)
-; PPC32-NEXT:    lbz 12, 111(1)
-; PPC32-NEXT:    add 27, 21, 27
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
+; PPC32-NEXT:    lbz 25, 79(1)
+; PPC32-NEXT:    lbz 24, 75(1)
+; PPC32-NEXT:    lbz 23, 139(1)
+; PPC32-NEXT:    lbz 22, 143(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 26, 23, 26
-; PPC32-NEXT:    lbz 30, 171(1)
-; PPC32-NEXT:    lbz 29, 103(1)
-; PPC32-NEXT:    lbz 23, 167(1)
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 23, 24
+; PPC32-NEXT:    lbz 29, 95(1)
 ; PPC32-NEXT:    add 25, 22, 25
-; PPC32-NEXT:    lbz 28, 99(1)
-; PPC32-NEXT:    lbz 22, 163(1)
-; PPC32-NEXT:    add 12, 21, 12
-; PPC32-NEXT:    add 30, 30, 0
-; PPC32-NEXT:    addi 12, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 12, 15(3)
-; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    lbz 28, 91(1)
+; PPC32-NEXT:    lbz 27, 87(1)
+; PPC32-NEXT:    lbz 23, 151(1)
+; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    lbz 4, 111(1)
+; PPC32-NEXT:    add 27, 23, 27
+; PPC32-NEXT:    lbz 21, 175(1)
 ; PPC32-NEXT:    add 28, 22, 28
-; PPC32-NEXT:    stb 12, 14(3)
-; PPC32-NEXT:    addi 12, 29, 1
-; PPC32-NEXT:    stb 12, 13(3)
-; PPC32-NEXT:    addi 12, 28, 1
-; PPC32-NEXT:    stb 12, 12(3)
-; PPC32-NEXT:    addi 12, 27, 1
-; PPC32-NEXT:    stb 12, 11(3)
-; PPC32-NEXT:    addi 12, 26, 1
-; PPC32-NEXT:    addi 9, 9, 1
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stb 12, 10(3)
-; PPC32-NEXT:    addi 12, 25, 1
-; PPC32-NEXT:    stb 9, 7(3)
-; PPC32-NEXT:    addi 9, 11, 1
-; PPC32-NEXT:    stb 6, 4(3)
-; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    lbz 11, 107(1)
+; PPC32-NEXT:    lbz 12, 171(1)
+; PPC32-NEXT:    add 4, 21, 4
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    addi 4, 4, 1
-; PPC32-NEXT:    stb 12, 9(3)
-; PPC32-NEXT:    addi 12, 24, 1
-; PPC32-NEXT:    stb 9, 6(3)
-; PPC32-NEXT:    addi 9, 10, 1
-; PPC32-NEXT:    stb 6, 3(3)
-; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    lbz 0, 103(1)
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    lbz 30, 99(1)
+; PPC32-NEXT:    lbz 23, 163(1)
+; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    add 30, 23, 30
+; PPC32-NEXT:    stb 4, 15(3)
+; PPC32-NEXT:    add 23, 22, 0
+; PPC32-NEXT:    addi 4, 11, 1
+; PPC32-NEXT:    stb 4, 14(3)
+; PPC32-NEXT:    addi 4, 23, 1
+; PPC32-NEXT:    stb 4, 13(3)
+; PPC32-NEXT:    addi 4, 30, 1
+; PPC32-NEXT:    stb 4, 12(3)
+; PPC32-NEXT:    addi 4, 29, 1
+; PPC32-NEXT:    stb 4, 11(3)
+; PPC32-NEXT:    addi 4, 28, 1
+; PPC32-NEXT:    stb 4, 10(3)
+; PPC32-NEXT:    addi 4, 27, 1
+; PPC32-NEXT:    stb 4, 9(3)
+; PPC32-NEXT:    addi 4, 26, 1
+; PPC32-NEXT:    stb 4, 8(3)
+; PPC32-NEXT:    addi 4, 25, 1
+; PPC32-NEXT:    stb 4, 7(3)
+; PPC32-NEXT:    addi 4, 24, 1
+; PPC32-NEXT:    stb 4, 6(3)
+; PPC32-NEXT:    addi 4, 10, 1
+; PPC32-NEXT:    stb 4, 5(3)
+; PPC32-NEXT:    addi 4, 9, 1
+; PPC32-NEXT:    stb 4, 4(3)
+; PPC32-NEXT:    addi 4, 8, 1
+; PPC32-NEXT:    stb 4, 3(3)
+; PPC32-NEXT:    addi 4, 7, 1
+; PPC32-NEXT:    stb 4, 2(3)
+; PPC32-NEXT:    addi 4, 6, 1
 ; PPC32-NEXT:    stb 4, 1(3)
 ; PPC32-NEXT:    addi 4, 5, 1
-; PPC32-NEXT:    stb 12, 8(3)
-; PPC32-NEXT:    stb 9, 5(3)
-; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    lbz 11, 199(1)
-; PPC64BE-NEXT:    lbz 12, 191(1)
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 22, 199(1)
+; PPC64BE-NEXT:    lbz 23, 191(1)
 ; PPC64BE-NEXT:    add 6, 21, 6
 ; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 11, 5
-; PPC64BE-NEXT:    lbz 11, 223(1)
-; PPC64BE-NEXT:    add 4, 12, 4
-; PPC64BE-NEXT:    lbz 12, 215(1)
-; PPC64BE-NEXT:    lbz 23, 127(1)
+; PPC64BE-NEXT:    add 5, 22, 5
+; PPC64BE-NEXT:    lbz 22, 223(1)
+; PPC64BE-NEXT:    add 4, 23, 4
+; PPC64BE-NEXT:    lbz 23, 215(1)
 ; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 25, 127(1)
+; PPC64BE-NEXT:    add 8, 22, 8
 ; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    lbz 22, 119(1)
-; PPC64BE-NEXT:    add 8, 11, 8
-; PPC64BE-NEXT:    lbz 11, 247(1)
-; PPC64BE-NEXT:    add 7, 12, 7
-; PPC64BE-NEXT:    lbz 12, 239(1)
-; PPC64BE-NEXT:    lbz 26, 151(1)
-; PPC64BE-NEXT:    add 23, 21, 23
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    lbz 25, 143(1)
-; PPC64BE-NEXT:    add 11, 11, 22
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    lbz 24, 135(1)
-; PPC64BE-NEXT:    add 10, 12, 10
-; PPC64BE-NEXT:    lbz 12, 263(1)
-; PPC64BE-NEXT:    lbz 30, 175(1)
-; PPC64BE-NEXT:    lbz 29, 303(1)
-; PPC64BE-NEXT:    add 26, 21, 26
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 25, 22, 25
-; PPC64BE-NEXT:    lbz 28, 167(1)
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 12, 12, 24
-; PPC64BE-NEXT:    lbz 27, 159(1)
-; PPC64BE-NEXT:    lbz 24, 287(1)
-; PPC64BE-NEXT:    add 30, 29, 30
-; PPC64BE-NEXT:    add 29, 21, 0
-; PPC64BE-NEXT:    addi 0, 29, 1
-; PPC64BE-NEXT:    add 28, 22, 28
-; PPC64BE-NEXT:    stb 0, 15(3)
-; PPC64BE-NEXT:    addi 0, 30, 1
-; PPC64BE-NEXT:    add 27, 24, 27
-; PPC64BE-NEXT:    stb 0, 14(3)
-; PPC64BE-NEXT:    addi 0, 28, 1
-; PPC64BE-NEXT:    stb 0, 13(3)
-; PPC64BE-NEXT:    addi 0, 27, 1
-; PPC64BE-NEXT:    stb 0, 12(3)
-; PPC64BE-NEXT:    addi 0, 26, 1
-; PPC64BE-NEXT:    addi 12, 12, 1
-; PPC64BE-NEXT:    stb 0, 11(3)
-; PPC64BE-NEXT:    addi 0, 25, 1
-; PPC64BE-NEXT:    stb 12, 9(3)
-; PPC64BE-NEXT:    addi 12, 23, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    add 7, 23, 7
+; PPC64BE-NEXT:    lbz 24, 119(1)
 ; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    lbz 22, 247(1)
+; PPC64BE-NEXT:    add 25, 21, 25
+; PPC64BE-NEXT:    lbz 23, 239(1)
 ; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    lbz 28, 151(1)
+; PPC64BE-NEXT:    add 24, 22, 24
+; PPC64BE-NEXT:    lbz 21, 279(1)
+; PPC64BE-NEXT:    add 10, 23, 10
+; PPC64BE-NEXT:    lbz 27, 143(1)
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    add 28, 21, 28
+; PPC64BE-NEXT:    lbz 26, 135(1)
 ; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    lbz 23, 263(1)
+; PPC64BE-NEXT:    add 27, 22, 27
+; PPC64BE-NEXT:    lbz 11, 183(1)
 ; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    lbz 21, 311(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 12, 175(1)
 ; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    lbz 0, 303(1)
+; PPC64BE-NEXT:    add 11, 21, 11
+; PPC64BE-NEXT:    lbz 30, 167(1)
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    lbz 22, 295(1)
+; PPC64BE-NEXT:    add 12, 0, 12
+; PPC64BE-NEXT:    lbz 29, 159(1)
 ; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    stb 0, 10(3)
-; PPC64BE-NEXT:    stb 12, 8(3)
+; PPC64BE-NEXT:    lbz 23, 287(1)
+; PPC64BE-NEXT:    add 30, 22, 30
+; PPC64BE-NEXT:    stb 11, 15(3)
+; PPC64BE-NEXT:    addi 11, 12, 1
+; PPC64BE-NEXT:    add 29, 23, 29
+; PPC64BE-NEXT:    stb 11, 14(3)
+; PPC64BE-NEXT:    addi 11, 30, 1
+; PPC64BE-NEXT:    stb 11, 13(3)
+; PPC64BE-NEXT:    addi 11, 29, 1
+; PPC64BE-NEXT:    stb 11, 12(3)
+; PPC64BE-NEXT:    addi 11, 28, 1
+; PPC64BE-NEXT:    stb 11, 11(3)
+; PPC64BE-NEXT:    addi 11, 27, 1
+; PPC64BE-NEXT:    stb 11, 10(3)
+; PPC64BE-NEXT:    addi 11, 26, 1
+; PPC64BE-NEXT:    stb 11, 9(3)
+; PPC64BE-NEXT:    addi 11, 25, 1
+; PPC64BE-NEXT:    stb 11, 8(3)
+; PPC64BE-NEXT:    addi 11, 24, 1
 ; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
@@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 50(1)
-; PPC32-NEXT:    lhz 12, 46(1)
-; PPC32-NEXT:    lhz 0, 42(1)
-; PPC32-NEXT:    lhz 30, 70(1)
-; PPC32-NEXT:    lhz 29, 66(1)
-; PPC32-NEXT:    lhz 28, 62(1)
-; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 11, 70(1)
+; PPC32-NEXT:    lhz 12, 66(1)
+; PPC32-NEXT:    lhz 0, 62(1)
+; PPC32-NEXT:    add 10, 11, 10
+; PPC32-NEXT:    lhz 30, 58(1)
+; PPC32-NEXT:    add 9, 12, 9
+; PPC32-NEXT:    lhz 29, 50(1)
+; PPC32-NEXT:    add 8, 0, 8
+; PPC32-NEXT:    lhz 28, 42(1)
+; PPC32-NEXT:    add 7, 30, 7
+; PPC32-NEXT:    lhz 27, 46(1)
+; PPC32-NEXT:    add 5, 29, 5
 ; PPC32-NEXT:    lhz 26, 54(1)
-; PPC32-NEXT:    add 3, 0, 3
-; PPC32-NEXT:    add 4, 12, 4
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    add 6, 26, 6
-; PPC32-NEXT:    add 7, 27, 7
-; PPC32-NEXT:    add 8, 28, 8
-; PPC32-NEXT:    add 9, 29, 9
-; PPC32-NEXT:    add 10, 30, 10
+; PPC32-NEXT:    add 3, 28, 3
+; PPC32-NEXT:    add 4, 27, 4
 ; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    add 6, 26, 6
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
 ; PPC32-NEXT:    addi 6, 6, 1
@@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 142(1)
-; PPC64BE-NEXT:    lhz 12, 134(1)
-; PPC64BE-NEXT:    lhz 0, 126(1)
-; PPC64BE-NEXT:    lhz 30, 118(1)
-; PPC64BE-NEXT:    lhz 29, 182(1)
-; PPC64BE-NEXT:    lhz 28, 174(1)
-; PPC64BE-NEXT:    lhz 27, 166(1)
-; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 11, 118(1)
+; PPC64BE-NEXT:    lhz 12, 182(1)
+; PPC64BE-NEXT:    lhz 0, 174(1)
+; PPC64BE-NEXT:    lhz 30, 166(1)
+; PPC64BE-NEXT:    add 11, 12, 11
+; PPC64BE-NEXT:    lhz 29, 158(1)
+; PPC64BE-NEXT:    add 10, 0, 10
+; PPC64BE-NEXT:    lhz 28, 142(1)
+; PPC64BE-NEXT:    add 9, 30, 9
+; PPC64BE-NEXT:    lhz 27, 126(1)
+; PPC64BE-NEXT:    add 8, 29, 8
+; PPC64BE-NEXT:    lhz 26, 134(1)
+; PPC64BE-NEXT:    add 6, 28, 6
 ; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 0, 4
-; PPC64BE-NEXT:    add 5, 12, 5
-; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 4, 27, 4
+; PPC64BE-NEXT:    add 5, 26, 5
+; PPC64BE-NEXT:    addi 11, 11, 1
 ; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    add 8, 26, 8
-; PPC64BE-NEXT:    add 9, 27, 9
-; PPC64BE-NEXT:    add 10, 28, 10
-; PPC64BE-NEXT:    add 11, 29, 30
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
index 42cbb30318bc..5fae34f212cc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
@@ -17,9 +17,9 @@ entry:
 ; argument put on stack.
 ; CHECK-NOT: mr 8, 4
 ; CHECK: stw 6, 16(1)
+; CHECK: stw 7, 20(1)
 ; CHECK: stw 5, 12(1)
 ; CHECK: stw 4, 8(1)
-; CHECK: stw 7, 20(1)
 
 declare i32 @printf(i8* nocapture readonly, ...)
 

diff  --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index c9d9cf870e49..b87f1a682e25 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1442,19 +1442,19 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r
 ; PC64-NEXT:    mr 29, 3
 ; PC64-NEXT:    li 3, 0
 ; PC64-NEXT:    stfd 31, 168(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 160(1) # 8-byte Folded Spill
 ; PC64-NEXT:    std 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 28, 144(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 152(1) # 8-byte Folded Spill
 ; PC64-NEXT:    mr 30, 4
 ; PC64-NEXT:    lfs 31, 0(29)
 ; PC64-NEXT:    std 3, 8(4)
 ; PC64-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
+; PC64-NEXT:    stfd 30, 160(1) # 8-byte Folded Spill
 ; PC64-NEXT:    lfs 30, .LCPI32_0 at toc@l(3)
 ; PC64-NEXT:    fmr 1, 31
 ; PC64-NEXT:    fmr 3, 31
+; PC64-NEXT:    stfd 28, 144(1) # 8-byte Folded Spill
 ; PC64-NEXT:    fmr 2, 30
 ; PC64-NEXT:    fmr 4, 30
+; PC64-NEXT:    stfd 29, 152(1) # 8-byte Folded Spill
 ; PC64-NEXT:    stfd 31, 0(4)
 ; PC64-NEXT:    bl __gcc_qadd
 ; PC64-NEXT:    nop
@@ -1475,14 +1475,14 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    frsp 0, 1
 ; PC64-NEXT:    stfs 0, 0(29)
-; PC64-NEXT:    lfd 31, 168(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 160(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 152(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 28, 144(1) # 8-byte Folded Reload
 ; PC64-NEXT:    ld 29, 120(1) # 8-byte Folded Reload
 ; PC64-NEXT:    stfd 1, -16(30)
 ; PC64-NEXT:    stfd 2, -8(30)
 ; PC64-NEXT:    ld 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 31, 168(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 160(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 152(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 28, 144(1) # 8-byte Folded Reload
 ; PC64-NEXT:    addi 1, 1, 176
 ; PC64-NEXT:    ld 0, 16(1)
 ; PC64-NEXT:    mtlr 0

diff  --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll
index 91722283f4ae..9dc1a52c567f 100644
--- a/llvm/test/CodeGen/PowerPC/pr43976.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43976.ll
@@ -10,11 +10,11 @@ define dso_local signext i32 @b() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -144(r1)
 ; CHECK-NEXT:    addis r3, r2, a at toc@ha
-; CHECK-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    lfd f0, a at toc@l(r3)
-; CHECK-NEXT:    lfs f1, .LCPI0_0 at toc@l(r4)
 ; CHECK-NEXT:    li r4, 1
+; CHECK-NEXT:    lfd f0, a at toc@l(r3)
+; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    sldi r4, r4, 63
+; CHECK-NEXT:    lfs f1, .LCPI0_0 at toc@l(r3)
 ; CHECK-NEXT:    fsub f2, f0, f1
 ; CHECK-NEXT:    fctidz f2, f2
 ; CHECK-NEXT:    stfd f2, 128(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll
index d2400be43cb4..1c4c7a339817 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -1297,6 +1297,8 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32*
 ; CHECK-NEXT:    evlddx 31, 1, 5 # 8-byte Folded Reload
 ; CHECK-NEXT:    li 5, 256
 ; CHECK-NEXT:    evlddx 30, 1, 5 # 8-byte Folded Reload
+; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
 ; CHECK-NEXT:    evldd 29, 248(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 28, 240(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 27, 232(1) # 8-byte Folded Reload
@@ -1313,8 +1315,6 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32*
 ; CHECK-NEXT:    evldd 16, 144(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 15, 136(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 14, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
 ; CHECK-NEXT:    lwz 31, 348(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 30, 344(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 340(1) # 4-byte Folded Reload
@@ -1392,8 +1392,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
 ; CHECK-NEXT:    # implicit-def: $r5
 ; CHECK-NEXT:  .LBB57_4: # %for.cond.cleanup
 ; CHECK-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mr 3, 5
+; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 0, 52(1)

diff  --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
index db92a3eb1bee..d2b55aaf7ac8 100644
--- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll
+++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
@@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
 ; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 4, 119(1)
-; PPC32-NEXT:    lbz 11, 115(1)
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 4, 4, 6
 ; PPC32-NEXT:    lbz 21, 123(1)
-; PPC32-NEXT:    lbz 6, 131(1)
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 23, 115(1)
+; PPC32-NEXT:    lbz 22, 119(1)
 ; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    lbz 24, 83(1)
-; PPC32-NEXT:    lbz 23, 79(1)
-; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 5, 23, 5
+; PPC32-NEXT:    lbz 23, 127(1)
+; PPC32-NEXT:    add 6, 22, 6
+; PPC32-NEXT:    lbz 22, 131(1)
 ; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    lbz 9, 143(1)
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 8, 11, 8
-; PPC32-NEXT:    lbz 22, 75(1)
-; PPC32-NEXT:    lbz 11, 139(1)
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 24, 21, 24
-; PPC32-NEXT:    lbz 27, 95(1)
-; PPC32-NEXT:    lbz 21, 159(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 9, 23
-; PPC32-NEXT:    lbz 26, 91(1)
-; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    add 8, 23, 8
+; PPC32-NEXT:    lbz 26, 83(1)
+; PPC32-NEXT:    add 9, 22, 9
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 11, 11, 22
-; PPC32-NEXT:    lbz 25, 87(1)
-; PPC32-NEXT:    lbz 22, 151(1)
-; PPC32-NEXT:    lbz 12, 111(1)
-; PPC32-NEXT:    add 27, 21, 27
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
+; PPC32-NEXT:    lbz 25, 79(1)
+; PPC32-NEXT:    lbz 24, 75(1)
+; PPC32-NEXT:    lbz 23, 139(1)
+; PPC32-NEXT:    lbz 22, 143(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 26, 23, 26
-; PPC32-NEXT:    lbz 30, 171(1)
-; PPC32-NEXT:    lbz 29, 103(1)
-; PPC32-NEXT:    lbz 23, 167(1)
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 23, 24
+; PPC32-NEXT:    lbz 29, 95(1)
 ; PPC32-NEXT:    add 25, 22, 25
-; PPC32-NEXT:    lbz 28, 99(1)
-; PPC32-NEXT:    lbz 22, 163(1)
-; PPC32-NEXT:    add 12, 21, 12
-; PPC32-NEXT:    add 30, 30, 0
-; PPC32-NEXT:    addi 12, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 12, 15(3)
-; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    lbz 28, 91(1)
+; PPC32-NEXT:    lbz 27, 87(1)
+; PPC32-NEXT:    lbz 23, 151(1)
+; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    lbz 4, 111(1)
+; PPC32-NEXT:    add 27, 23, 27
+; PPC32-NEXT:    lbz 21, 175(1)
 ; PPC32-NEXT:    add 28, 22, 28
-; PPC32-NEXT:    stb 12, 14(3)
-; PPC32-NEXT:    addi 12, 29, 1
-; PPC32-NEXT:    stb 12, 13(3)
-; PPC32-NEXT:    addi 12, 28, 1
-; PPC32-NEXT:    stb 12, 12(3)
-; PPC32-NEXT:    addi 12, 27, 1
-; PPC32-NEXT:    stb 12, 11(3)
-; PPC32-NEXT:    addi 12, 26, 1
-; PPC32-NEXT:    addi 9, 9, 1
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stb 12, 10(3)
-; PPC32-NEXT:    addi 12, 25, 1
-; PPC32-NEXT:    stb 9, 7(3)
-; PPC32-NEXT:    addi 9, 11, 1
-; PPC32-NEXT:    stb 6, 4(3)
-; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    lbz 11, 107(1)
+; PPC32-NEXT:    lbz 12, 171(1)
+; PPC32-NEXT:    add 4, 21, 4
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    addi 4, 4, 1
-; PPC32-NEXT:    stb 12, 9(3)
-; PPC32-NEXT:    addi 12, 24, 1
-; PPC32-NEXT:    stb 9, 6(3)
-; PPC32-NEXT:    addi 9, 10, 1
-; PPC32-NEXT:    stb 6, 3(3)
-; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    lbz 0, 103(1)
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    lbz 30, 99(1)
+; PPC32-NEXT:    lbz 23, 163(1)
+; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    add 30, 23, 30
+; PPC32-NEXT:    stb 4, 15(3)
+; PPC32-NEXT:    add 23, 22, 0
+; PPC32-NEXT:    addi 4, 11, 1
+; PPC32-NEXT:    stb 4, 14(3)
+; PPC32-NEXT:    addi 4, 23, 1
+; PPC32-NEXT:    stb 4, 13(3)
+; PPC32-NEXT:    addi 4, 30, 1
+; PPC32-NEXT:    stb 4, 12(3)
+; PPC32-NEXT:    addi 4, 29, 1
+; PPC32-NEXT:    stb 4, 11(3)
+; PPC32-NEXT:    addi 4, 28, 1
+; PPC32-NEXT:    stb 4, 10(3)
+; PPC32-NEXT:    addi 4, 27, 1
+; PPC32-NEXT:    stb 4, 9(3)
+; PPC32-NEXT:    addi 4, 26, 1
+; PPC32-NEXT:    stb 4, 8(3)
+; PPC32-NEXT:    addi 4, 25, 1
+; PPC32-NEXT:    stb 4, 7(3)
+; PPC32-NEXT:    addi 4, 24, 1
+; PPC32-NEXT:    stb 4, 6(3)
+; PPC32-NEXT:    addi 4, 10, 1
+; PPC32-NEXT:    stb 4, 5(3)
+; PPC32-NEXT:    addi 4, 9, 1
+; PPC32-NEXT:    stb 4, 4(3)
+; PPC32-NEXT:    addi 4, 8, 1
+; PPC32-NEXT:    stb 4, 3(3)
+; PPC32-NEXT:    addi 4, 7, 1
+; PPC32-NEXT:    stb 4, 2(3)
+; PPC32-NEXT:    addi 4, 6, 1
 ; PPC32-NEXT:    stb 4, 1(3)
 ; PPC32-NEXT:    addi 4, 5, 1
-; PPC32-NEXT:    stb 12, 8(3)
-; PPC32-NEXT:    stb 9, 5(3)
-; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    lbz 11, 199(1)
-; PPC64BE-NEXT:    lbz 12, 191(1)
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 22, 199(1)
+; PPC64BE-NEXT:    lbz 23, 191(1)
 ; PPC64BE-NEXT:    add 6, 21, 6
 ; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 11, 5
-; PPC64BE-NEXT:    lbz 11, 223(1)
-; PPC64BE-NEXT:    add 4, 12, 4
-; PPC64BE-NEXT:    lbz 12, 215(1)
-; PPC64BE-NEXT:    lbz 23, 127(1)
+; PPC64BE-NEXT:    add 5, 22, 5
+; PPC64BE-NEXT:    lbz 22, 223(1)
+; PPC64BE-NEXT:    add 4, 23, 4
+; PPC64BE-NEXT:    lbz 23, 215(1)
 ; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 25, 127(1)
+; PPC64BE-NEXT:    add 8, 22, 8
 ; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    lbz 22, 119(1)
-; PPC64BE-NEXT:    add 8, 11, 8
-; PPC64BE-NEXT:    lbz 11, 247(1)
-; PPC64BE-NEXT:    add 7, 12, 7
-; PPC64BE-NEXT:    lbz 12, 239(1)
-; PPC64BE-NEXT:    lbz 26, 151(1)
-; PPC64BE-NEXT:    add 23, 21, 23
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    lbz 25, 143(1)
-; PPC64BE-NEXT:    add 11, 11, 22
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    lbz 24, 135(1)
-; PPC64BE-NEXT:    add 10, 12, 10
-; PPC64BE-NEXT:    lbz 12, 263(1)
-; PPC64BE-NEXT:    lbz 30, 175(1)
-; PPC64BE-NEXT:    lbz 29, 303(1)
-; PPC64BE-NEXT:    add 26, 21, 26
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 25, 22, 25
-; PPC64BE-NEXT:    lbz 28, 167(1)
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 12, 12, 24
-; PPC64BE-NEXT:    lbz 27, 159(1)
-; PPC64BE-NEXT:    lbz 24, 287(1)
-; PPC64BE-NEXT:    add 30, 29, 30
-; PPC64BE-NEXT:    add 29, 21, 0
-; PPC64BE-NEXT:    addi 0, 29, 1
-; PPC64BE-NEXT:    add 28, 22, 28
-; PPC64BE-NEXT:    stb 0, 15(3)
-; PPC64BE-NEXT:    addi 0, 30, 1
-; PPC64BE-NEXT:    add 27, 24, 27
-; PPC64BE-NEXT:    stb 0, 14(3)
-; PPC64BE-NEXT:    addi 0, 28, 1
-; PPC64BE-NEXT:    stb 0, 13(3)
-; PPC64BE-NEXT:    addi 0, 27, 1
-; PPC64BE-NEXT:    stb 0, 12(3)
-; PPC64BE-NEXT:    addi 0, 26, 1
-; PPC64BE-NEXT:    addi 12, 12, 1
-; PPC64BE-NEXT:    stb 0, 11(3)
-; PPC64BE-NEXT:    addi 0, 25, 1
-; PPC64BE-NEXT:    stb 12, 9(3)
-; PPC64BE-NEXT:    addi 12, 23, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    add 7, 23, 7
+; PPC64BE-NEXT:    lbz 24, 119(1)
 ; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    lbz 22, 247(1)
+; PPC64BE-NEXT:    add 25, 21, 25
+; PPC64BE-NEXT:    lbz 23, 239(1)
 ; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    lbz 28, 151(1)
+; PPC64BE-NEXT:    add 24, 22, 24
+; PPC64BE-NEXT:    lbz 21, 279(1)
+; PPC64BE-NEXT:    add 10, 23, 10
+; PPC64BE-NEXT:    lbz 27, 143(1)
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    add 28, 21, 28
+; PPC64BE-NEXT:    lbz 26, 135(1)
 ; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    lbz 23, 263(1)
+; PPC64BE-NEXT:    add 27, 22, 27
+; PPC64BE-NEXT:    lbz 11, 183(1)
 ; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    lbz 21, 311(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 12, 175(1)
 ; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    lbz 0, 303(1)
+; PPC64BE-NEXT:    add 11, 21, 11
+; PPC64BE-NEXT:    lbz 30, 167(1)
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    lbz 22, 295(1)
+; PPC64BE-NEXT:    add 12, 0, 12
+; PPC64BE-NEXT:    lbz 29, 159(1)
 ; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    stb 0, 10(3)
-; PPC64BE-NEXT:    stb 12, 8(3)
+; PPC64BE-NEXT:    lbz 23, 287(1)
+; PPC64BE-NEXT:    add 30, 22, 30
+; PPC64BE-NEXT:    stb 11, 15(3)
+; PPC64BE-NEXT:    addi 11, 12, 1
+; PPC64BE-NEXT:    add 29, 23, 29
+; PPC64BE-NEXT:    stb 11, 14(3)
+; PPC64BE-NEXT:    addi 11, 30, 1
+; PPC64BE-NEXT:    stb 11, 13(3)
+; PPC64BE-NEXT:    addi 11, 29, 1
+; PPC64BE-NEXT:    stb 11, 12(3)
+; PPC64BE-NEXT:    addi 11, 28, 1
+; PPC64BE-NEXT:    stb 11, 11(3)
+; PPC64BE-NEXT:    addi 11, 27, 1
+; PPC64BE-NEXT:    stb 11, 10(3)
+; PPC64BE-NEXT:    addi 11, 26, 1
+; PPC64BE-NEXT:    stb 11, 9(3)
+; PPC64BE-NEXT:    addi 11, 25, 1
+; PPC64BE-NEXT:    stb 11, 8(3)
+; PPC64BE-NEXT:    addi 11, 24, 1
 ; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
@@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 50(1)
-; PPC32-NEXT:    lhz 12, 46(1)
-; PPC32-NEXT:    lhz 0, 42(1)
-; PPC32-NEXT:    lhz 30, 70(1)
-; PPC32-NEXT:    lhz 29, 66(1)
-; PPC32-NEXT:    lhz 28, 62(1)
-; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 11, 70(1)
+; PPC32-NEXT:    lhz 12, 66(1)
+; PPC32-NEXT:    lhz 0, 62(1)
+; PPC32-NEXT:    add 10, 11, 10
+; PPC32-NEXT:    lhz 30, 58(1)
+; PPC32-NEXT:    add 9, 12, 9
+; PPC32-NEXT:    lhz 29, 50(1)
+; PPC32-NEXT:    add 8, 0, 8
+; PPC32-NEXT:    lhz 28, 42(1)
+; PPC32-NEXT:    add 7, 30, 7
+; PPC32-NEXT:    lhz 27, 46(1)
+; PPC32-NEXT:    add 5, 29, 5
 ; PPC32-NEXT:    lhz 26, 54(1)
-; PPC32-NEXT:    add 3, 0, 3
-; PPC32-NEXT:    add 4, 12, 4
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    add 6, 26, 6
-; PPC32-NEXT:    add 7, 27, 7
-; PPC32-NEXT:    add 8, 28, 8
-; PPC32-NEXT:    add 9, 29, 9
-; PPC32-NEXT:    add 10, 30, 10
+; PPC32-NEXT:    add 3, 28, 3
+; PPC32-NEXT:    add 4, 27, 4
 ; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    add 6, 26, 6
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
 ; PPC32-NEXT:    addi 6, 6, 1
@@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 142(1)
-; PPC64BE-NEXT:    lhz 12, 134(1)
-; PPC64BE-NEXT:    lhz 0, 126(1)
-; PPC64BE-NEXT:    lhz 30, 118(1)
-; PPC64BE-NEXT:    lhz 29, 182(1)
-; PPC64BE-NEXT:    lhz 28, 174(1)
-; PPC64BE-NEXT:    lhz 27, 166(1)
-; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 11, 118(1)
+; PPC64BE-NEXT:    lhz 12, 182(1)
+; PPC64BE-NEXT:    lhz 0, 174(1)
+; PPC64BE-NEXT:    lhz 30, 166(1)
+; PPC64BE-NEXT:    add 11, 12, 11
+; PPC64BE-NEXT:    lhz 29, 158(1)
+; PPC64BE-NEXT:    add 10, 0, 10
+; PPC64BE-NEXT:    lhz 28, 142(1)
+; PPC64BE-NEXT:    add 9, 30, 9
+; PPC64BE-NEXT:    lhz 27, 126(1)
+; PPC64BE-NEXT:    add 8, 29, 8
+; PPC64BE-NEXT:    lhz 26, 134(1)
+; PPC64BE-NEXT:    add 6, 28, 6
 ; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 0, 4
-; PPC64BE-NEXT:    add 5, 12, 5
-; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 4, 27, 4
+; PPC64BE-NEXT:    add 5, 26, 5
+; PPC64BE-NEXT:    addi 11, 11, 1
 ; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    add 8, 26, 8
-; PPC64BE-NEXT:    add 9, 27, 9
-; PPC64BE-NEXT:    add 10, 28, 10
-; PPC64BE-NEXT:    add 11, 29, 30
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)

diff  --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
index c0a8a76c7f1a..815d5b7443e4 100644
--- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@@ -5,23 +5,23 @@
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC64-LABEL: muloti_test:
 ; PPC64:       # %bb.0: # %start
+; PPC64-NEXT:    mulhdu. 8, 3, 6
+; PPC64-NEXT:    mcrf 1, 0
 ; PPC64-NEXT:    mulld 8, 5, 4
-; PPC64-NEXT:    cmpdi 5, 3, 0
-; PPC64-NEXT:    mulhdu. 9, 3, 6
+; PPC64-NEXT:    cmpdi 3, 0
 ; PPC64-NEXT:    mulld 3, 3, 6
-; PPC64-NEXT:    mcrf 1, 0
+; PPC64-NEXT:    cmpdi 5, 5, 0
 ; PPC64-NEXT:    add 3, 3, 8
-; PPC64-NEXT:    cmpdi 5, 0
-; PPC64-NEXT:    crnor 20, 2, 22
-; PPC64-NEXT:    cmpldi 3, 0
+; PPC64-NEXT:    crnor 20, 22, 2
 ; PPC64-NEXT:    mulhdu 8, 4, 6
+; PPC64-NEXT:    cmpldi 3, 0
 ; PPC64-NEXT:    add 3, 8, 3
 ; PPC64-NEXT:    cmpld 6, 3, 8
 ; PPC64-NEXT:    crandc 21, 24, 2
 ; PPC64-NEXT:    crorc 20, 20, 6
-; PPC64-NEXT:    li 7, 1
 ; PPC64-NEXT:    mulhdu. 5, 5, 4
 ; PPC64-NEXT:    crorc 20, 20, 2
+; PPC64-NEXT:    li 7, 1
 ; PPC64-NEXT:    crnor 20, 20, 21
 ; PPC64-NEXT:    mulld 4, 4, 6
 ; PPC64-NEXT:    bc 12, 20, .LBB0_2
@@ -38,13 +38,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    stw 0, 4(1)
 ; PPC32-NEXT:    stwu 1, -80(1)
 ; PPC32-NEXT:    stw 26, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    stw 27, 60(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 27, 4
 ; PPC32-NEXT:    stw 29, 68(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 29, 7
 ; PPC32-NEXT:    stw 30, 72(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    mr 30, 8
-; PPC32-NEXT:    mr 29, 7
-; PPC32-NEXT:    mr 27, 4
 ; PPC32-NEXT:    mr 26, 3
 ; PPC32-NEXT:    li 3, 0
 ; PPC32-NEXT:    li 4, 0
@@ -54,30 +54,36 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    stw 21, 36(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 22, 40(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 23, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 23, 6
 ; PPC32-NEXT:    stw 24, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 24, 5
 ; PPC32-NEXT:    stw 25, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    mr 25, 10
-; PPC32-NEXT:    stw 12, 28(1)
+; PPC32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    mr 28, 9
-; PPC32-NEXT:    mr 23, 6
-; PPC32-NEXT:    mr 24, 5
+; PPC32-NEXT:    stw 12, 28(1)
 ; PPC32-NEXT:    bl __multi3
 ; PPC32-NEXT:    mr 7, 4
 ; PPC32-NEXT:    mullw 4, 24, 30
+; PPC32-NEXT:    cmpwi 5, 24, 0
+; PPC32-NEXT:    cmpwi 6, 26, 0
+; PPC32-NEXT:    cmpwi 7, 28, 0
+; PPC32-NEXT:    crnor 9, 30, 26
 ; PPC32-NEXT:    mullw 8, 29, 23
-; PPC32-NEXT:    mullw 10, 28, 27
-; PPC32-NEXT:    mullw 11, 26, 25
+; PPC32-NEXT:    add 21, 8, 4
+; PPC32-NEXT:    mullw 11, 28, 27
+; PPC32-NEXT:    mullw 12, 26, 25
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    cmplwi 7, 11, 0
 ; PPC32-NEXT:    mulhwu 9, 30, 23
-; PPC32-NEXT:    mulhwu 12, 27, 25
+; PPC32-NEXT:    add 12, 9, 21
+; PPC32-NEXT:    cmplw 6, 12, 9
+; PPC32-NEXT:    mulhwu 10, 27, 25
 ; PPC32-NEXT:    mullw 0, 30, 23
 ; PPC32-NEXT:    mullw 22, 27, 25
-; PPC32-NEXT:    add 21, 8, 4
-; PPC32-NEXT:    add 10, 11, 10
 ; PPC32-NEXT:    addc 4, 22, 0
-; PPC32-NEXT:    add 11, 9, 21
-; PPC32-NEXT:    add 0, 12, 10
-; PPC32-NEXT:    adde 8, 0, 11
+; PPC32-NEXT:    add 0, 10, 11
+; PPC32-NEXT:    adde 8, 0, 12
 ; PPC32-NEXT:    addc 4, 7, 4
 ; PPC32-NEXT:    adde 8, 3, 8
 ; PPC32-NEXT:    xor 22, 4, 7
@@ -85,21 +91,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    or. 22, 22, 20
 ; PPC32-NEXT:    mcrf 1, 0
 ; PPC32-NEXT:    cmpwi 29, 0
-; PPC32-NEXT:    cmpwi 5, 24, 0
-; PPC32-NEXT:    cmpwi 6, 26, 0
-; PPC32-NEXT:    cmpwi 7, 28, 0
 ; PPC32-NEXT:    crnor 8, 22, 2
 ; PPC32-NEXT:    mulhwu. 23, 29, 23
-; PPC32-NEXT:    crnor 9, 30, 26
 ; PPC32-NEXT:    mcrf 5, 0
 ; PPC32-NEXT:    cmplwi 21, 0
-; PPC32-NEXT:    cmplw 6, 11, 9
-; PPC32-NEXT:    cmplwi 7, 10, 0
 ; PPC32-NEXT:    crandc 10, 24, 2
-; PPC32-NEXT:    cmplw 3, 0, 12
+; PPC32-NEXT:    cmplw 3, 0, 10
+; PPC32-NEXT:    crandc 11, 12, 30
 ; PPC32-NEXT:    mulhwu. 9, 24, 30
 ; PPC32-NEXT:    mcrf 6, 0
-; PPC32-NEXT:    crandc 11, 12, 30
 ; PPC32-NEXT:    cmplw 4, 7
 ; PPC32-NEXT:    cmplw 7, 8, 3
 ; PPC32-NEXT:    crand 12, 30, 0

diff  --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll
index 7c048ff37108..0e6626bbce23 100644
--- a/llvm/test/CodeGen/PowerPC/vec_splat.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll
@@ -10,17 +10,17 @@
 define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
 ; G3-LABEL: splat:
 ; G3:       # %bb.0:
-; G3-NEXT:    lfs 0, 0(4)
+; G3-NEXT:    lfs 0, 12(4)
 ; G3-NEXT:    lfs 2, 8(4)
 ; G3-NEXT:    lfs 3, 4(4)
-; G3-NEXT:    lfs 4, 12(4)
 ; G3-NEXT:    fadds 0, 0, 1
-; G3-NEXT:    fadds 2, 2, 1
-; G3-NEXT:    fadds 3, 3, 1
-; G3-NEXT:    fadds 1, 4, 1
-; G3-NEXT:    stfs 1, 12(3)
-; G3-NEXT:    stfs 2, 8(3)
-; G3-NEXT:    stfs 3, 4(3)
+; G3-NEXT:    lfs 4, 0(4)
+; G3-NEXT:    stfs 0, 12(3)
+; G3-NEXT:    fadds 0, 2, 1
+; G3-NEXT:    stfs 0, 8(3)
+; G3-NEXT:    fadds 0, 3, 1
+; G3-NEXT:    stfs 0, 4(3)
+; G3-NEXT:    fadds 0, 4, 1
 ; G3-NEXT:    stfs 0, 0(3)
 ; G3-NEXT:    blr
 ;
@@ -49,18 +49,18 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
 define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_i4:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 6, 0(4)
+; G3-NEXT:    lwz 6, 12(4)
 ; G3-NEXT:    lwz 7, 8(4)
 ; G3-NEXT:    lwz 8, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    add 6, 6, 5
-; G3-NEXT:    add 8, 8, 5
-; G3-NEXT:    add 7, 7, 5
+; G3-NEXT:    lwz 4, 0(4)
+; G3-NEXT:    stw 6, 12(3)
+; G3-NEXT:    add 6, 7, 5
+; G3-NEXT:    stw 6, 8(3)
+; G3-NEXT:    add 6, 8, 5
 ; G3-NEXT:    add 4, 4, 5
-; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 7, 8(3)
-; G3-NEXT:    stw 8, 4(3)
-; G3-NEXT:    stw 6, 0(3)
+; G3-NEXT:    stw 6, 4(3)
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_i4:
@@ -88,18 +88,18 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
 define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_imm_i32:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 5, 0(4)
+; G3-NEXT:    lwz 5, 12(4)
 ; G3-NEXT:    lwz 6, 8(4)
 ; G3-NEXT:    lwz 7, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 5, 5, -1
-; G3-NEXT:    addi 7, 7, -1
-; G3-NEXT:    addi 6, 6, -1
+; G3-NEXT:    lwz 4, 0(4)
+; G3-NEXT:    stw 5, 12(3)
+; G3-NEXT:    addi 5, 6, -1
+; G3-NEXT:    stw 5, 8(3)
+; G3-NEXT:    addi 5, 7, -1
 ; G3-NEXT:    addi 4, 4, -1
-; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 6, 8(3)
-; G3-NEXT:    stw 7, 4(3)
-; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    stw 5, 4(3)
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_imm_i32:
@@ -118,22 +118,22 @@ define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
 define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_imm_i16:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 5, 0(4)
-; G3-NEXT:    lwz 6, 8(4)
+; G3-NEXT:    lwz 5, 8(4)
+; G3-NEXT:    lwz 6, 0(4)
 ; G3-NEXT:    lwz 7, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 5, 5, 1
-; G3-NEXT:    addi 7, 7, 1
+; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 6, 6, 1
+; G3-NEXT:    addi 7, 7, 1
 ; G3-NEXT:    addi 4, 4, 1
-; G3-NEXT:    addis 5, 5, 1
-; G3-NEXT:    addis 7, 7, 1
-; G3-NEXT:    addis 6, 6, 1
 ; G3-NEXT:    addis 4, 4, 1
 ; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 6, 8(3)
-; G3-NEXT:    stw 7, 4(3)
-; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    addis 4, 5, 1
+; G3-NEXT:    stw 4, 8(3)
+; G3-NEXT:    addis 4, 7, 1
+; G3-NEXT:    stw 4, 4(3)
+; G3-NEXT:    addis 4, 6, 1
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_imm_i16:
@@ -189,58 +189,60 @@ define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ; G3-LABEL: spltish:
 ; G3:       # %bb.0:
 ; G3-NEXT:    stwu 1, -48(1)
+; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 5, 0(4)
+; G3-NEXT:    lbz 30, 15(4)
+; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 29, 13(4)
+; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 28, 11(4)
+; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 27, 9(4)
+; G3-NEXT:    stw 24, 16(1) # 4-byte Folded Spill
 ; G3-NEXT:    stw 25, 20(1) # 4-byte Folded Spill
 ; G3-NEXT:    stw 26, 24(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
-; G3-NEXT:    lbz 5, 5(4)
-; G3-NEXT:    lbz 6, 3(4)
-; G3-NEXT:    lbz 7, 1(4)
-; G3-NEXT:    lbz 8, 0(4)
-; G3-NEXT:    lbz 9, 2(4)
-; G3-NEXT:    lbz 10, 4(4)
-; G3-NEXT:    lbz 11, 6(4)
-; G3-NEXT:    lbz 12, 8(4)
-; G3-NEXT:    lbz 0, 10(4)
-; G3-NEXT:    addi 7, 7, -15
-; G3-NEXT:    lbz 30, 12(4)
-; G3-NEXT:    lbz 29, 14(4)
-; G3-NEXT:    lbz 28, 15(4)
-; G3-NEXT:    lbz 27, 13(4)
-; G3-NEXT:    lbz 26, 11(4)
-; G3-NEXT:    lbz 25, 9(4)
-; G3-NEXT:    addi 6, 6, -15
-; G3-NEXT:    lbz 4, 7(4)
-; G3-NEXT:    addi 5, 5, -15
-; G3-NEXT:    addi 25, 25, -15
-; G3-NEXT:    addi 26, 26, -15
-; G3-NEXT:    addi 4, 4, -15
-; G3-NEXT:    addi 27, 27, -15
-; G3-NEXT:    addi 28, 28, -15
-; G3-NEXT:    stb 29, 14(3)
-; G3-NEXT:    stb 30, 12(3)
-; G3-NEXT:    stb 0, 10(3)
-; G3-NEXT:    stb 12, 8(3)
-; G3-NEXT:    stb 11, 6(3)
-; G3-NEXT:    stb 10, 4(3)
-; G3-NEXT:    stb 9, 2(3)
-; G3-NEXT:    stb 8, 0(3)
-; G3-NEXT:    stb 28, 15(3)
-; G3-NEXT:    stb 27, 13(3)
-; G3-NEXT:    stb 26, 11(3)
-; G3-NEXT:    stb 25, 9(3)
-; G3-NEXT:    stb 4, 7(3)
+; G3-NEXT:    lbz 6, 2(4)
+; G3-NEXT:    lbz 7, 4(4)
+; G3-NEXT:    lbz 8, 6(4)
+; G3-NEXT:    lbz 9, 8(4)
+; G3-NEXT:    lbz 10, 10(4)
+; G3-NEXT:    lbz 11, 12(4)
+; G3-NEXT:    lbz 12, 14(4)
+; G3-NEXT:    lbz 26, 7(4)
+; G3-NEXT:    lbz 25, 5(4)
+; G3-NEXT:    lbz 24, 3(4)
+; G3-NEXT:    lbz 4, 1(4)
+; G3-NEXT:    stb 5, 0(3)
+; G3-NEXT:    addi 5, 30, -15
+; G3-NEXT:    stb 5, 15(3)
+; G3-NEXT:    addi 5, 29, -15
+; G3-NEXT:    stb 5, 13(3)
+; G3-NEXT:    addi 5, 28, -15
+; G3-NEXT:    stb 5, 11(3)
+; G3-NEXT:    addi 5, 27, -15
+; G3-NEXT:    stb 5, 9(3)
+; G3-NEXT:    addi 5, 26, -15
+; G3-NEXT:    stb 5, 7(3)
+; G3-NEXT:    addi 5, 25, -15
 ; G3-NEXT:    stb 5, 5(3)
-; G3-NEXT:    stb 6, 3(3)
-; G3-NEXT:    stb 7, 1(3)
+; G3-NEXT:    addi 5, 24, -15
+; G3-NEXT:    addi 4, 4, -15
+; G3-NEXT:    stb 12, 14(3)
+; G3-NEXT:    stb 11, 12(3)
+; G3-NEXT:    stb 10, 10(3)
+; G3-NEXT:    stb 9, 8(3)
+; G3-NEXT:    stb 8, 6(3)
+; G3-NEXT:    stb 7, 4(3)
+; G3-NEXT:    stb 6, 2(3)
+; G3-NEXT:    stb 5, 3(3)
+; G3-NEXT:    stb 4, 1(3)
 ; G3-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 28, 32(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 27, 28(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 26, 24(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 25, 20(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 24, 16(1) # 4-byte Folded Reload
 ; G3-NEXT:    addi 1, 1, 48
 ; G3-NEXT:    blr
 ;


        


More information about the llvm-commits mailing list