[llvm] TargetSchedule: correct latency by cycles elapsed from def to use (PR #74088)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 1 07:05:36 PST 2023


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/74088

The getOperandLatency function falls back to default def/instruction latency in many cases, but this fallback latency doesn't account for the cycles elapsed from the def to the use. However, this is a hard problem to solve, since we don't have the scheduling model in the fallback cases. As a conservative approximation, set the fallback latency to zero if it is much less than the distance between the def and the use, albeit by a factor of 22. Improvements are observed on standard benchmarks.

>From d8ec3838415a00adc981677e1a9394d54ebcbac2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Tue, 28 Nov 2023 13:29:45 +0000
Subject: [PATCH] TargetSchedule: correct latency by cycles elapsed from def to
 use

The getOperandLatency function falls back to default def/instruction
latency in many cases, but this fallback latency doesn't account for the
cycles elapsed from the def to the use. However, this is a hard problem
to solve, since we don't have the scheduling model in the fallback
cases. As a conservative approximation, set the fallback latency to zero
if it is much less than the distance between the def and the use, albeit
by a factor of 22. Improvements are observed on standard benchmarks.
---
 llvm/lib/CodeGen/TargetSchedule.cpp           |   27 +-
 .../CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll  |   19 +-
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll     |  394 +-
 .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll     |  182 +-
 .../CodeGen/AMDGPU/GlobalISel/udiv.i32.ll     |  224 +-
 .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll     |  861 +--
 .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll     |  836 +--
 .../CodeGen/AMDGPU/calling-conventions.ll     |  307 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll   |    8 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll   |    8 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i1.ll  | 1632 +++--
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll | 1026 ++-
 llvm/test/CodeGen/AMDGPU/load-constant-i32.ll |  287 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i8.ll  | 1260 ++--
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   |  795 ++-
 llvm/test/CodeGen/AMDGPU/load-global-i32.ll   |  231 +-
 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll   |  126 +-
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll       |   82 +-
 llvm/test/CodeGen/PowerPC/inc-of-add.ll       |  188 +-
 llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll   |    4 +-
 llvm/test/CodeGen/PowerPC/sat-add.ll          |    2 +-
 llvm/test/CodeGen/PowerPC/sub-of-not.ll       |  188 +-
 llvm/test/CodeGen/PowerPC/testBitReverse.ll   |   52 +-
 .../umulo-128-legalisation-lowering.ll        |  153 +-
 .../PowerPC/vector-popcnt-128-ult-ugt.ll      | 5856 ++++++++---------
 .../PowerPC/wide-scalar-shift-legalization.ll |  124 +-
 llvm/test/CodeGen/RISCV/bswap-bitreverse.ll   |   25 +-
 llvm/test/CodeGen/RISCV/mul.ll                |    4 +-
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    |  177 +-
 .../rvv/fixed-vectors-int-explodevector.ll    |   96 +-
 .../RISCV/rvv/fixed-vectors-stepvector.ll     |   28 +-
 llvm/test/CodeGen/RISCV/srem-vector-lkk.ll    |   76 +-
 ...ve-complex-deinterleaving-uniform-cases.ll |   10 +-
 .../CodeGen/Thumb2/mve-fpclamptosat_vec.ll    |   32 +-
 .../CodeGen/Thumb2/mve-fptosi-sat-vector.ll   |    6 +-
 .../CodeGen/Thumb2/mve-fptoui-sat-vector.ll   |    2 +-
 llvm/test/CodeGen/Thumb2/mve-phireg.ll        |   10 +-
 llvm/test/CodeGen/Thumb2/mve-shuffle.ll       |    4 +-
 llvm/test/CodeGen/Thumb2/mve-vldst4.ll        |    2 +-
 llvm/test/CodeGen/Thumb2/mve-vst3.ll          |   16 +-
 llvm/test/CodeGen/Thumb2/mve-vst4.ll          |    6 +-
 llvm/test/CodeGen/X86/mul-constant-result.ll  |  127 +-
 llvm/test/CodeGen/X86/sad.ll                  |    7 +-
 .../CodeGen/X86/x86-interleaved-access.ll     |    4 +-
 44 files changed, 7759 insertions(+), 7745 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index a25d4ff78f4d967..7c353eb196a44cf 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -174,8 +174,31 @@ unsigned TargetSchedModel::computeOperandLatency(
   const MachineInstr *DefMI, unsigned DefOperIdx,
   const MachineInstr *UseMI, unsigned UseOperIdx) const {
 
-  const unsigned InstrLatency = computeInstrLatency(DefMI);
-  const unsigned DefaultDefLatency = TII->defaultDefLatency(SchedModel, *DefMI);
+  unsigned InstrLatency = computeInstrLatency(DefMI);
+  unsigned DefaultDefLatency = TII->defaultDefLatency(SchedModel, *DefMI);
+
+  // We fall back to computing the default latency in many cases. However, this
+  // doesn't take into account the distance between DefMI and UseMI, which would
+  // approximately be the number of cycles elapsed between the def and the use
+  // (quite an approximate, since we don't have the SchedModel). A conservative
+  // approximation would then be to check that this fallback latency is much
+  // less than this distance, and set it to zero if so.
+  const MachineBasicBlock *DefBB = DefMI->getParent();
+  const MachineBasicBlock *UseBB = UseMI ? UseMI->getParent() : nullptr;
+  if (DefBB && DefBB == UseBB) {
+    auto DefIt = find_if(DefBB->instrs(), [DefMI](const MachineInstr &MI) {
+      return &MI == DefMI;
+    });
+    auto UseIt = find_if(DefBB->instrs(), [UseMI](const MachineInstr &MI) {
+      return &MI == UseMI;
+    });
+    unsigned DefUseDist = std::distance(DefIt, UseIt) - 1;
+    const unsigned MulFactor = 22; // Chosen experimentally
+    if (MulFactor * InstrLatency < DefUseDist)
+      InstrLatency = 0;
+    if (MulFactor * DefaultDefLatency < DefUseDist)
+      DefaultDefLatency = 0;
+  }
 
   if (!hasInstrSchedModel() && !hasInstrItineraries())
     return InstrLatency;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
index 6b054556135156f..4a2ee6aed39ebbb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
@@ -17,12 +17,10 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    v_mov_b32_e32 v8, s6
 ; LOOP-NEXT:  .LBB0_1: ; %load-store-loop
 ; LOOP-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    buffer_load_ubyte v9, v[4:5], s[4:7], 0 addr64
-; LOOP-NEXT:    s_waitcnt expcnt(6)
 ; LOOP-NEXT:    buffer_load_ubyte v10, v[4:5], s[4:7], 0 addr64 offset:1
-; LOOP-NEXT:    s_waitcnt expcnt(3)
 ; LOOP-NEXT:    buffer_load_ubyte v11, v[4:5], s[4:7], 0 addr64 offset:2
-; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    buffer_load_ubyte v12, v[4:5], s[4:7], 0 addr64 offset:3
 ; LOOP-NEXT:    buffer_load_ubyte v13, v[4:5], s[4:7], 0 addr64 offset:4
 ; LOOP-NEXT:    buffer_load_ubyte v14, v[4:5], s[4:7], 0 addr64 offset:5
@@ -87,11 +85,8 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    buffer_store_byte v11, v[6:7], s[4:7], 0 addr64 offset:8
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v11, 24, v11
-; LOOP-NEXT:    v_lshrrev_b32_e32 v19, 16, v12
-; LOOP-NEXT:    v_bfe_u32 v20, v12, 8, 8
 ; LOOP-NEXT:    buffer_store_byte v12, v[6:7], s[4:7], 0 addr64 offset:12
-; LOOP-NEXT:    s_waitcnt expcnt(0)
-; LOOP-NEXT:    v_lshrrev_b32_e32 v12, 24, v12
+; LOOP-NEXT:    v_lshrrev_b32_e32 v19, 24, v12
 ; LOOP-NEXT:    buffer_store_byte v14, v[6:7], s[4:7], 0 addr64 offset:1
 ; LOOP-NEXT:    buffer_store_byte v13, v[6:7], s[4:7], 0 addr64 offset:2
 ; LOOP-NEXT:    buffer_store_byte v9, v[6:7], s[4:7], 0 addr64 offset:3
@@ -101,9 +96,13 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    buffer_store_byte v18, v[6:7], s[4:7], 0 addr64 offset:9
 ; LOOP-NEXT:    buffer_store_byte v17, v[6:7], s[4:7], 0 addr64 offset:10
 ; LOOP-NEXT:    buffer_store_byte v11, v[6:7], s[4:7], 0 addr64 offset:11
-; LOOP-NEXT:    buffer_store_byte v20, v[6:7], s[4:7], 0 addr64 offset:13
-; LOOP-NEXT:    buffer_store_byte v19, v[6:7], s[4:7], 0 addr64 offset:14
-; LOOP-NEXT:    buffer_store_byte v12, v[6:7], s[4:7], 0 addr64 offset:15
+; LOOP-NEXT:    s_waitcnt expcnt(6)
+; LOOP-NEXT:    v_lshrrev_b32_e32 v9, 16, v12
+; LOOP-NEXT:    s_waitcnt expcnt(3)
+; LOOP-NEXT:    v_bfe_u32 v10, v12, 8, 8
+; LOOP-NEXT:    buffer_store_byte v19, v[6:7], s[4:7], 0 addr64 offset:15
+; LOOP-NEXT:    buffer_store_byte v10, v[6:7], s[4:7], 0 addr64 offset:13
+; LOOP-NEXT:    buffer_store_byte v9, v[6:7], s[4:7], 0 addr64 offset:14
 ; LOOP-NEXT:    v_add_i32_e64 v6, s[0:1], 16, v6
 ; LOOP-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, v7, s[0:1]
 ; LOOP-NEXT:    v_add_i32_e64 v4, s[0:1], 16, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 1061f0003bd4896..6329fcde8ba14ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -154,60 +154,60 @@ define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v9, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v8
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v3
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[4:5], v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v7, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32:
@@ -267,8 +267,8 @@ define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v9
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, %den
   ret <2 x i32> %result
@@ -577,60 +577,60 @@ define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v6
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v9, v4, v8
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v2
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
-; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v2
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v1, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[6:7], v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v6, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom:
@@ -640,60 +640,60 @@ define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; CGP-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
 ; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v2
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT:    v_xor_b32_e32 v8, v5, v6
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_xor_b32_e32 v8, v4, v6
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CGP-NEXT:    v_xor_b32_e32 v9, v4, v7
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT:    v_xor_b32_e32 v4, v5, v7
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
-; CGP-NEXT:    v_rcp_f32_e32 v5, v5
-; CGP-NEXT:    v_rcp_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
-; CGP-NEXT:    v_mul_lo_u32 v9, v9, v7
-; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v9
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v6
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
+; CGP-NEXT:    v_rcp_f32_e32 v4, v4
+; CGP-NEXT:    v_rcp_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
+; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v7, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
-; CGP-NEXT:    v_mul_lo_u32 v10, v6, v3
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT:    v_sub_i32_e64 v7, s[4:5], v0, v2
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; CGP-NEXT:    v_mul_lo_u32 v6, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, v2
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v1, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v6, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v9, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v8
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v9
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = sdiv <2 x i32> %x, %shl.y
@@ -775,62 +775,62 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v2
+; GISEL-NEXT:    v_xor_b32_e32 v9, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v7
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v3
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v2
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v2
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v7
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v8
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v2
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[4:5], v1, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[6:7], v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v9
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32_24bit:
@@ -840,44 +840,44 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
-; CGP-NEXT:    v_rcp_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_rcp_f32_e32 v5, v5
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
+; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v5, v2
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
   %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 1bb606f36e48d2c..8637f86d430e8f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -188,8 +188,8 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32:
@@ -243,8 +243,8 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, %den
   ret <2 x i32> %result
@@ -530,20 +530,20 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v2
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v2
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
 ; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
@@ -556,28 +556,28 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
 ; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
-; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
@@ -587,20 +587,20 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; CGP-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
 ; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v2
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v6
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v2
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v2
 ; CGP-NEXT:    v_rcp_f32_e32 v6, v6
 ; CGP-NEXT:    v_rcp_f32_e32 v8, v8
 ; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
@@ -613,28 +613,28 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
 ; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
-; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
-; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
-; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT:    v_mul_hi_u32 v7, v0, v7
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v3
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = srem <2 x i32> %x, %shl.y
@@ -711,22 +711,22 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v2
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
@@ -739,28 +739,28 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index 6588112973f4c94..1a16b79df009ab9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -294,44 +294,44 @@ define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
 ; GISEL-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, v2
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i32_pow2_shl_denom:
@@ -339,44 +339,44 @@ define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CGP-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
 ; CGP-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
-; CGP-NEXT:    v_rcp_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_rcp_f32_e32 v5, v5
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
+; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v5, v2
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = udiv <2 x i32> %x, %shl.y
@@ -449,44 +449,44 @@ define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, v2
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i32_24bit:
@@ -496,44 +496,44 @@ define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
 ; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
 ; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
-; CGP-NEXT:    v_rcp_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_rcp_f32_e32 v5, v5
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
+; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v5, v2
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
 ; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
 ; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v6, vcc
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v7, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
   %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 3add708d1a6394d..f096686b54ec900 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -548,73 +548,73 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v6, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v8
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v14
-; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], 1, v9
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[8:9], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v18, s[10:11], 1, v13
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], 1, v8
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], 1, v9
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[8:9], v0, v14
+; GISEL-NEXT:    v_add_i32_e64 v14, s[10:11], 1, v11
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[12:13], v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v14
-; GISEL-NEXT:    v_add_i32_e64 v12, s[14:15], v21, v12
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[16:17], v2, v6
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[18:19], v0, v4
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[20:21], v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v20, v4, v10
+; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v2, v6
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[16:17], v2, v6
+; GISEL-NEXT:    v_add_i32_e64 v12, s[18:19], v21, v12
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[18:19], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[14:15]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[14:15], v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[20:21], v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v2, v4, v10
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[22:23], v0, v4
-; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], 0, v10, s[4:5]
 ; GISEL-NEXT:    v_mul_lo_u32 v4, v6, v12
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
-; GISEL-NEXT:    v_addc_u32_e64 v2, s[6:7], 0, v12, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[16:17]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v19, v4
-; GISEL-NEXT:    v_addc_u32_e64 v19, s[6:7], 0, v0, s[10:11]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], 0, v2, s[12:13]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v4, v11
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v16, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v16
-; GISEL-NEXT:    v_subb_u32_e64 v16, s[6:7], v3, v4, s[8:9]
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[22:23]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v11, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[10:11], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v5, s[4:5]
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v7
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[8:9]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v16, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v16, v6, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[18:19]
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, v16, v20, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, v12, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], v16, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[18:19]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v19, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[20:21]
+; GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v17
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[4:5], 0, v0, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v18
+; GISEL-NEXT:    v_subb_u32_e64 v18, s[4:5], v1, v2, s[8:9]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; GISEL-NEXT:    v_subb_u32_e64 v2, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], 0, v6, s[12:13]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v18, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[22:23]
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[8:9], v1, v5, s[8:9]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[8:9]
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[16:17]
+; GISEL-NEXT:    v_cndmask_b32_e32 v16, v16, v20, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v7
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v1, v5
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v3, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[10:11], 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[8:9]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v18, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v15, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v0, v19, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v2, v17, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v9, v3, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v12, v5, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v18, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v19, s[8:9]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v1
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v11, v14, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v13, v15, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v0, v17, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v6, v4, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v8, v1, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v10, v5, s[10:11]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i64:
@@ -1010,56 +1010,107 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 }
 
 define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
-; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v4, 0x1fb03c31
-; CHECK-NEXT:    v_mov_b32_e32 v5, 0xd9528440
-; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
-; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v5
-; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v4
-; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT:    v_mul_hi_u32 v10, v1, v4
-; CHECK-NEXT:    v_mul_hi_u32 v0, v0, v5
-; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v5
-; CHECK-NEXT:    v_mul_lo_u32 v11, v3, v4
-; CHECK-NEXT:    v_mul_lo_u32 v12, v2, v5
-; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v4
-; CHECK-NEXT:    v_mul_lo_u32 v14, v3, v5
-; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v5
-; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v5
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
-; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v8
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
-; CHECK-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_udiv_v2i64_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0x1fb03c31
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0xd9528440
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v3, v5
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v4
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v10, v4
+; GISEL-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
+; GISEL-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v4, 0x1fb03c31
+; CGP-NEXT:    v_mov_b32_e32 v5, 0xd9528440
+; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v5
+; CGP-NEXT:    v_mul_hi_u32 v9, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v10, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v13, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v5
+; CGP-NEXT:    v_mul_lo_u32 v2, v2, v5
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CGP-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
+; CGP-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
 }
@@ -1248,65 +1299,65 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v9, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v10, 0
-; GISEL-NEXT:    v_lshl_b64 v[7:8], v[9:10], v4
-; GISEL-NEXT:    v_lshl_b64 v[4:5], v[9:10], v6
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v8
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v5
-; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v4
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
-; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
-; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
-; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
+; GISEL-NEXT:    v_mov_b32_e32 v7, 0x1000
+; GISEL-NEXT:    v_mov_b32_e32 v8, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], v[7:8], v4
+; GISEL-NEXT:    v_lshl_b64 v[6:7], v[7:8], v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v7
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v15, v5
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v10, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v14, 0x4f800000, v15
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v12, v12
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v13, v14
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x5f7ffffc, v12
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
+; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v12
+; GISEL-NEXT:    v_mul_f32_e32 v15, 0x2f800000, v13
 ; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
+; GISEL-NEXT:    v_trunc_f32_e32 v15, v15
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0xcf800000, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v13
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_mac_f32_e32 v13, 0xcf800000, v15
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v15
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v14
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
+; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v15
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v19, v10, v13
+; GISEL-NEXT:    v_mul_hi_u32 v20, v8, v13
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
+; GISEL-NEXT:    v_mul_lo_u32 v19, v15, v18
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
+; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v18
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v12
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
-; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v12
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
+; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v19
+; GISEL-NEXT:    v_mul_lo_u32 v21, v12, v16
 ; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
+; GISEL-NEXT:    v_mul_hi_u32 v21, v12, v19
 ; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
+; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v18, v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v16
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v17
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
-; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
+; GISEL-NEXT:    v_mul_hi_u32 v20, v12, v16
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v17
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
@@ -1322,186 +1373,186 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v19
+; GISEL-NEXT:    v_mul_hi_u32 v16, v14, v16
+; GISEL-NEXT:    v_mul_hi_u32 v17, v15, v17
+; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v13, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v12
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v18, v9, v12
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
-; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v13
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v13
+; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v17, s[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v12, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v6
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v6
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v19
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, v14, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v14, v20
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v15, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v15, v19
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v14, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v17
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v17, v13, v8
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v8
 ; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
+; GISEL-NEXT:    v_mul_hi_u32 v18, v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v15, v8
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
-; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v6, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
-; GISEL-NEXT:    v_mul_hi_u32 v21, v3, v9
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v10
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v18, v6
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v20, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v19
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v10
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v3, v10
+; GISEL-NEXT:    v_mul_lo_u32 v18, v1, v12
+; GISEL-NEXT:    v_mul_hi_u32 v19, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v13
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v14, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v15, v8, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v2, v9
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v3, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, v0, v8
+; GISEL-NEXT:    v_mul_lo_u32 v20, v1, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v21, v1, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v18, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v20, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v19
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], v10, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v14
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v19
+; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v10, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v14
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v13
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v6
-; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v8
+; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v4, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v6
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v14
-; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], 1, v9
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[8:9], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v18, s[10:11], 1, v13
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], 1, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v13
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], 1, v9
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[8:9], v2, v14
+; GISEL-NEXT:    v_add_i32_e64 v14, s[10:11], 1, v12
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[12:13], v15, v10
-; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v14
-; GISEL-NEXT:    v_add_i32_e64 v12, s[14:15], v21, v12
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v0, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[16:17], v2, v4
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[18:19], v0, v7
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[20:21], v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v20, v7, v10
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[22:23], v0, v7
-; GISEL-NEXT:    v_addc_u32_e32 v0, vcc, 0, v10, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v2, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v4, s[24:25], v16, v20
-; GISEL-NEXT:    v_addc_u32_e64 v7, s[6:7], 0, v12, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], v19, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[14:15]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[6:7], v4, v17
-; GISEL-NEXT:    v_subb_u32_e64 v17, s[6:7], v1, v4, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[16:17]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v17, v8
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[14:15], v17, v8
-; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, v0, s[10:11]
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[18:19]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v8
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[10:11], v1, v8
-; GISEL-NEXT:    v_addc_u32_e64 v1, s[12:13], 0, v7, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[22:23]
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
-; GISEL-NEXT:    v_subb_u32_e64 v11, vcc, v3, v2, s[8:9]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
-; GISEL-NEXT:    v_subb_u32_e64 v2, s[8:9], v2, v5, s[8:9]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v2, vcc, 0, v2, s[20:21]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v16, s[14:15]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v4, s[8:9]
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v19, s[6:7]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v13, v18, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v15, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v0, v17, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v7, v1, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v9, v3, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v12, v5, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[12:13], 1, v13
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[14:15], v0, v4
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[16:17], v0, v4
+; GISEL-NEXT:    v_add_i32_e64 v11, s[18:19], v21, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[18:19], v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[14:15]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[14:15], v2, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[20:21], v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v0, v6, v10
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[22:23], v2, v6
+; GISEL-NEXT:    v_addc_u32_e64 v2, s[4:5], 0, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v16, v0
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v0, v17
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v0, s[8:9]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v3, v0
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, v11, s[6:7]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v7
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v6, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[18:19]
+; GISEL-NEXT:    v_subb_u32_e64 v0, s[8:9], v0, v7, s[8:9]
+; GISEL-NEXT:    v_subbrev_u32_e64 v0, s[8:9], 0, v0, s[14:15]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v7
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[14:15], v0, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[20:21]
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[10:11], 0, v2, s[10:11]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[10:11], v19, v4
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[10:11], 0, v3, s[12:13]
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[22:23]
+; GISEL-NEXT:    v_add_i32_e64 v4, s[10:11], v4, v18
+; GISEL-NEXT:    v_subb_u32_e64 v18, s[10:11], v1, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[10:11], v1, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[10:11], v18, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[10:11]
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[16:17]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v18, v20, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[14:15]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v1
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v12, v14, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v15, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v2, v7, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v3, v16, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v8, v0, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v10, v4, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v5, vcc
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
@@ -1906,56 +1957,56 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v1
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v4
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v6
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v0
 ; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
 ; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
-; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v6
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v8
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
-; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v7
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
 ; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
 ; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v11
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v6
-; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
-; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v14
+; GISEL-NEXT:    v_mul_hi_u32 v20, v4, v14
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v11, v17
-; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v17
+; GISEL-NEXT:    v_mul_hi_u32 v21, v5, v17
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v12
+; GISEL-NEXT:    v_mul_lo_u32 v16, v4, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v22, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v22, v4, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
-; GISEL-NEXT:    v_mul_lo_u32 v23, v7, v13
+; GISEL-NEXT:    v_mul_lo_u32 v23, v5, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v24, v11, v13
-; GISEL-NEXT:    v_mul_hi_u32 v25, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v25, v5, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
 ; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
@@ -1985,36 +2036,36 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
 ; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v13, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v11, v13
-; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v9
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
-; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v8, v4
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v7
+; GISEL-NEXT:    v_mul_hi_u32 v21, v5, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v7
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
@@ -2041,66 +2092,66 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v14
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v12, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v4, 0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v11, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v13, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v13, v1, v4
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v6
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v4, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v17, v0, v5
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v7
-; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, v0, v7
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v20, vcc, 0, v16, vcc
@@ -2109,15 +2160,15 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_addc_u32_e32 v21, vcc, 0, v19, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v15
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
 ; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], 0, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 0, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v11
 ; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v13, s[6:7], 0, v13
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v0
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, -1, v14, s[6:7]
@@ -2125,11 +2176,11 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, -1, v15, vcc
 ; GISEL-NEXT:    v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v1
 ; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v3, v0
 ; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v13, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
@@ -2140,15 +2191,15 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v12, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v18, v17, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v18, v17, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v20, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, v19, v21, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v19, v21, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v7, v2, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 12df4b7c7fc33d7..5f437d52879597c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -591,22 +591,22 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[6:7], 0, v3, s[16:17]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v5
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v19, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[10:11], 0, v9
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[8:9]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v5
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v12, v4, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v13, v6, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v13, v6, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v18, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[12:13]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[12:13]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v10, v1, s[10:11]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64:
@@ -1108,15 +1108,15 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
 ; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, s6, v6
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v7, s4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v10, s5, v5
-; GISEL-NEXT:    v_mul_hi_u32 v11, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, s4, v6
+; GISEL-NEXT:    v_mul_lo_u32 v9, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, s7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, s4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, s5, v5
+; GISEL-NEXT:    v_mul_hi_u32 v14, s4, v5
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v9
@@ -1165,19 +1165,19 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v5, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v6, v7, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, s4, v9
-; GISEL-NEXT:    v_mul_lo_u32 v11, s5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v13, s4, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, s6, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, s7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s6, v9
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
 ; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v15, s4, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, s4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, s5, v5
+; GISEL-NEXT:    v_mul_hi_u32 v14, s4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v15, s6, v7
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v10
 ; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, s6, v6
+; GISEL-NEXT:    v_mul_lo_u32 v18, s4, v6
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
@@ -1223,22 +1223,22 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
 ; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v9
-; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v18, v2, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v15, v2, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v17, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v18, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
@@ -1275,51 +1275,51 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v4
 ; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
-; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
-; GISEL-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v3, v7, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v2, v4
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v0, v12
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[8:9], v1, v5, s[6:7]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[8:9], v1, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[8:9]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; GISEL-NEXT:    s_mov_b64 s[4:5], vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
-; GISEL-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[8:9]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
-; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v12, s[4:5], v0, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
+; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, -1, v13, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v12, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v12, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v15, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64_oddk_denom:
@@ -1397,97 +1397,97 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
 ; CGP-NEXT:    v_mul_hi_u32 v8, v0, v5
 ; CGP-NEXT:    v_mul_hi_u32 v9, v1, v5
 ; CGP-NEXT:    v_mul_lo_u32 v10, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v11, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
 ; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
-; CGP-NEXT:    v_mul_lo_u32 v12, v0, v6
-; CGP-NEXT:    v_mul_lo_u32 v13, v1, v6
-; CGP-NEXT:    v_mul_hi_u32 v14, v0, v6
-; CGP-NEXT:    v_mul_hi_u32 v15, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v16, v2, v6
-; CGP-NEXT:    v_mul_lo_u32 v17, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v18, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v15, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v16, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v17, v1, v6
+; CGP-NEXT:    v_mul_hi_u32 v18, v0, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v16
 ; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v17, v9
 ; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v14
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v18
 ; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v17, v9
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
 ; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
 ; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_mul_lo_u32 v9, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v7, v4, v7
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
 ; CGP-NEXT:    v_mul_lo_u32 v11, v5, v4
 ; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT:    v_mul_lo_u32 v8, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v4, v7
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, v10, v4
 ; CGP-NEXT:    v_mul_lo_u32 v6, v6, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v9
-; CGP-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v11
-; CGP-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; CGP-NEXT:    v_subb_u32_e64 v7, vcc, v3, v5, s[4:5]
 ; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
-; CGP-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
-; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v2, v4
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v0, v9
+; CGP-NEXT:    v_subb_u32_e64 v9, s[8:9], v1, v6, s[6:7]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[8:9], v1, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[8:9]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[8:9]
+; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v4
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
 ; CGP-NEXT:    s_mov_b64 s[4:5], vcc
-; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
-; CGP-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v8
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[8:9]
 ; CGP-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
 ; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
-; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v12, s[4:5], v0, v4
+; CGP-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
 ; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
-; CGP-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v13, -1, v13, vcc
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v12, v4, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v15, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[4:5]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
@@ -1674,65 +1674,65 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v9, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v10, 0
-; GISEL-NEXT:    v_lshl_b64 v[7:8], v[9:10], v4
-; GISEL-NEXT:    v_lshl_b64 v[4:5], v[9:10], v6
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v8
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v7
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v5
-; GISEL-NEXT:    v_sub_i32_e64 v9, s[4:5], 0, v4
-; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
-; GISEL-NEXT:    v_mac_f32_e32 v13, 0x4f800000, v14
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v10
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v11, v13
-; GISEL-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
-; GISEL-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
-; GISEL-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
-; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v11
-; GISEL-NEXT:    v_trunc_f32_e32 v13, v13
+; GISEL-NEXT:    v_mov_b32_e32 v7, 0x1000
+; GISEL-NEXT:    v_mov_b32_e32 v8, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], v[7:8], v4
+; GISEL-NEXT:    v_lshl_b64 v[6:7], v[7:8], v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v13, v7
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v15, v5
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_subb_u32_e64 v10, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v13
+; GISEL-NEXT:    v_mac_f32_e32 v14, 0x4f800000, v15
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v12, v12
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v13, v14
+; GISEL-NEXT:    v_mul_f32_e32 v12, 0x5f7ffffc, v12
+; GISEL-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
+; GISEL-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v12
+; GISEL-NEXT:    v_mul_f32_e32 v15, 0x2f800000, v13
 ; GISEL-NEXT:    v_trunc_f32_e32 v14, v14
-; GISEL-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
-; GISEL-NEXT:    v_mac_f32_e32 v11, 0xcf800000, v14
+; GISEL-NEXT:    v_trunc_f32_e32 v15, v15
+; GISEL-NEXT:    v_mac_f32_e32 v12, 0xcf800000, v14
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v14
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v13
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
-; GISEL-NEXT:    v_mul_lo_u32 v18, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v12, v11
-; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v11
+; GISEL-NEXT:    v_mac_f32_e32 v13, 0xcf800000, v15
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v15
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v14
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
+; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v15
+; GISEL-NEXT:    v_mul_lo_u32 v18, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v19, v10, v13
+; GISEL-NEXT:    v_mul_hi_u32 v20, v8, v13
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
-; GISEL-NEXT:    v_mul_lo_u32 v19, v14, v18
+; GISEL-NEXT:    v_mul_lo_u32 v19, v15, v18
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v17
+; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v17
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v18
+; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v18
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v19, v20
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v10
+; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v12
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v20, v16
-; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v20, v9, v12
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v21, v10, v16
+; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v19
+; GISEL-NEXT:    v_mul_lo_u32 v21, v12, v16
 ; GISEL-NEXT:    v_add_i32_e64 v20, s[6:7], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v21, v10, v19
+; GISEL-NEXT:    v_mul_hi_u32 v21, v12, v19
 ; GISEL-NEXT:    v_add_i32_e64 v20, s[8:9], v20, v21
-; GISEL-NEXT:    v_mul_hi_u32 v19, v13, v19
-; GISEL-NEXT:    v_mul_hi_u32 v18, v14, v18
-; GISEL-NEXT:    v_mul_lo_u32 v20, v13, v16
+; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v18, v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v16
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v17
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[12:13], v20, v18
-; GISEL-NEXT:    v_mul_hi_u32 v20, v10, v16
+; GISEL-NEXT:    v_mul_hi_u32 v20, v12, v16
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[14:15], v19, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v11, v17
+; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v17
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[16:17], v18, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[8:9]
@@ -1748,102 +1748,102 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_add_i32_e32 v23, vcc, v23, v24
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v18, v22
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v19
-; GISEL-NEXT:    v_mul_hi_u32 v16, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v17, v14, v17
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v18
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v19
+; GISEL-NEXT:    v_mul_hi_u32 v16, v14, v16
+; GISEL-NEXT:    v_mul_hi_u32 v17, v15, v17
+; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v13, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v21, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
-; GISEL-NEXT:    v_mul_lo_u32 v20, v6, v10
-; GISEL-NEXT:    v_mul_lo_u32 v15, v15, v10
+; GISEL-NEXT:    v_mul_lo_u32 v20, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v12
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v18, v9, v12
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
-; GISEL-NEXT:    v_mul_lo_u32 v19, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v11
-; GISEL-NEXT:    v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
-; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v11
-; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v17, v10, v20
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v13
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
-; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v13
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v13
+; GISEL-NEXT:    v_addc_u32_e64 v14, vcc, v14, v16, s[6:7]
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_addc_u32_e64 v15, vcc, v15, v17, s[8:9]
+; GISEL-NEXT:    v_mul_hi_u32 v17, v12, v20
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v12, v9
-; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v20
-; GISEL-NEXT:    v_mul_hi_u32 v20, v13, v20
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
-; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v19
-; GISEL-NEXT:    v_mul_hi_u32 v19, v14, v19
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v6
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
-; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v6
-; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v19
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, v14, v20
+; GISEL-NEXT:    v_mul_hi_u32 v20, v14, v20
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v18
+; GISEL-NEXT:    v_mul_lo_u32 v18, v15, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v15, v19
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v14, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v17
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v17, v13, v8
 ; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v20
-; GISEL-NEXT:    v_mul_lo_u32 v20, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v20, v15, v8
 ; GISEL-NEXT:    v_add_i32_e64 v17, s[8:9], v18, v17
-; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v9
+; GISEL-NEXT:    v_mul_hi_u32 v18, v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v15, v8
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[10:11], v20, v19
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v16, v12
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[10:11]
 ; GISEL-NEXT:    v_add_i32_e64 v18, s[8:9], v19, v18
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_add_i32_e64 v19, s[4:5], v20, v19
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v20
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[8:9]
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v20
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v19
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v19
 ; GISEL-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v17
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v18
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v3, v11
-; GISEL-NEXT:    v_mul_hi_u32 v19, v2, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v11
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v13, v6, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
-; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v20, v3, v9
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v16, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v9
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v17
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v19
+; GISEL-NEXT:    v_mul_lo_u32 v16, v3, v10
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v3, v10
+; GISEL-NEXT:    v_mul_lo_u32 v18, v1, v12
+; GISEL-NEXT:    v_mul_hi_u32 v19, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v13
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v14, v9, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v15, v8, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v2, v9
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v9
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v8
+; GISEL-NEXT:    v_mul_lo_u32 v20, v1, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v10
 ; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v18, v15
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v20, v11
+; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v20, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
 ; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v13, v19
-; GISEL-NEXT:    v_add_i32_e64 v11, s[8:9], v11, v16
+; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v16
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
@@ -1851,81 +1851,81 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
 ; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
 ; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v20, v16
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v17, v8, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v4, v11
-; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v17, v7, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
+; GISEL-NEXT:    v_mul_lo_u32 v18, v4, v12
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v12
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v16
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v18
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v13
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v4
-; GISEL-NEXT:    v_sub_i32_e64 v12, s[10:11], v0, v7
-; GISEL-NEXT:    v_sub_i32_e64 v13, s[12:13], v2, v4
-; GISEL-NEXT:    v_mul_lo_u32 v6, v7, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v16
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v18
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v4
+; GISEL-NEXT:    v_sub_i32_e64 v11, s[10:11], v2, v6
+; GISEL-NEXT:    v_sub_i32_e64 v13, s[12:13], v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[8:9]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v11, v6
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v13, v4
-; GISEL-NEXT:    v_sub_i32_e64 v7, s[14:15], v12, v7
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[14:15], v11, v6
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[16:17], v13, v4
-; GISEL-NEXT:    v_add_i32_e64 v6, s[18:19], v17, v6
-; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v19, v9
+; GISEL-NEXT:    v_add_i32_e64 v9, s[18:19], v17, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[18:19], v19, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v11
-; GISEL-NEXT:    v_subb_u32_e64 v10, s[6:7], v1, v6, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v6
-; GISEL-NEXT:    v_subb_u32_e64 v6, s[6:7], v3, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v12
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[6:7], v3, v9, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v3, s[6:7], v3, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v10, v8
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v5
-; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v8
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11]
-; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v8, s[10:11]
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e64 v14, vcc, 0, v3, s[12:13]
-; GISEL-NEXT:    v_subb_u32_e64 v3, vcc, v3, v5, s[12:13]
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s[8:9]
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v18, v8
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], v1, v8, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v10, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v7
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v18, vcc, 0, v3, s[10:11]
+; GISEL-NEXT:    v_subb_u32_e64 v3, vcc, v3, v7, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e64 v14, vcc, 0, v1, s[12:13]
+; GISEL-NEXT:    v_subb_u32_e64 v1, vcc, v1, v5, s[12:13]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[8:9]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v18, v7
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[14:15]
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v5
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[6:7], 0, v3, s[16:17]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v8
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[16:17]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v7
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], v14, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[10:11], 0, v8
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[12:13], 0, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v17, s[8:9]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v5
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v12, v7, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v13, v4, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v1, s[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v14, v3, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v6, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[8:9]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v11, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v13, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v18, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v14, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[12:13]
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v10, v3, s[10:11]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[12:13]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
@@ -2324,56 +2324,56 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
-; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
-; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v6
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v1
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
-; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v4
+; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v6
+; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v0
 ; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
 ; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
-; GISEL-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
-; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v6
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v8
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
-; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v7
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
 ; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
 ; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v11
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v6
-; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v7
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
-; GISEL-NEXT:    v_mul_hi_u32 v20, v6, v14
+; GISEL-NEXT:    v_mul_hi_u32 v20, v4, v14
 ; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v11, v17
-; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v17
+; GISEL-NEXT:    v_mul_hi_u32 v21, v5, v17
 ; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
-; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v12
+; GISEL-NEXT:    v_mul_lo_u32 v16, v4, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v19, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v22, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v22, v4, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
-; GISEL-NEXT:    v_mul_lo_u32 v23, v7, v13
+; GISEL-NEXT:    v_mul_lo_u32 v23, v5, v13
 ; GISEL-NEXT:    v_mul_lo_u32 v24, v11, v13
-; GISEL-NEXT:    v_mul_hi_u32 v25, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v25, v5, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
 ; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
@@ -2403,36 +2403,36 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
 ; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v6
-; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
 ; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v13, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v7
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v8
 ; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v17, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v12
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v11
 ; GISEL-NEXT:    v_mul_lo_u32 v18, v11, v13
-; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
 ; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v9
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
-; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v8, v4
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v5
-; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v5
-; GISEL-NEXT:    v_mul_hi_u32 v21, v7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v6
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v20, v11, v7
+; GISEL-NEXT:    v_mul_hi_u32 v21, v5, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v7
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
@@ -2459,75 +2459,75 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v14
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v12, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v4
 ; GISEL-NEXT:    v_mul_hi_u32 v4, 0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v11, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
 ; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v6
-; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v7
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_mul_lo_u32 v4, v1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v0, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v6, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v0, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
 ; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v4, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], 0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v11
 ; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
 ; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], 0, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v0
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[6:7]
@@ -2535,11 +2535,11 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, -1, v9, vcc
 ; GISEL-NEXT:    v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v2, v1
 ; GISEL-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v2, v0
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v3, v0
 ; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
@@ -2554,15 +2554,15 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, v12, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v12, v0, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v15, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v15, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v3, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v8, v1, s[4:5]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_urem_v2i64_24bit:
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index cb89841b58f9787..1dab3e3863bcca7 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -1452,73 +1452,73 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) {
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s4, 0
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s5, s4
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_lshr_b32 s5, s0, 16
-; SI-NEXT:    s_lshr_b32 s6, s0, 24
-; SI-NEXT:    s_lshr_b32 s8, s1, 16
-; SI-NEXT:    s_lshr_b32 s9, s1, 24
-; SI-NEXT:    s_lshr_b32 s10, s2, 16
-; SI-NEXT:    s_lshr_b32 s11, s2, 24
-; SI-NEXT:    s_lshr_b32 s12, s3, 16
-; SI-NEXT:    s_lshr_b32 s13, s3, 24
-; SI-NEXT:    s_bfe_u32 s14, s0, 0x80008
-; SI-NEXT:    s_bfe_u32 s15, s1, 0x80008
-; SI-NEXT:    s_bfe_u32 s16, s2, 0x80008
-; SI-NEXT:    s_bfe_u32 s17, s3, 0x80008
+; SI-NEXT:    s_lshr_b32 s8, s0, 16
+; SI-NEXT:    s_lshr_b32 s9, s0, 24
+; SI-NEXT:    s_lshr_b32 s10, s1, 16
+; SI-NEXT:    s_lshr_b32 s11, s1, 24
+; SI-NEXT:    s_lshr_b32 s12, s2, 16
+; SI-NEXT:    s_lshr_b32 s13, s2, 24
+; SI-NEXT:    s_lshr_b32 s14, s3, 16
+; SI-NEXT:    s_lshr_b32 s15, s3, 24
+; SI-NEXT:    s_bfe_u32 s16, s0, 0x80008
+; SI-NEXT:    s_bfe_u32 s17, s1, 0x80008
+; SI-NEXT:    s_bfe_u32 s18, s2, 0x80008
+; SI-NEXT:    s_bfe_u32 s19, s3, 0x80008
 ; SI-NEXT:    s_add_i32 s3, s3, s3
 ; SI-NEXT:    s_add_i32 s2, s2, s2
-; SI-NEXT:    s_add_i32 s1, s1, s1
-; SI-NEXT:    s_add_i32 s0, s0, s0
-; SI-NEXT:    s_add_i32 s13, s13, s13
-; SI-NEXT:    s_add_i32 s12, s12, s12
-; SI-NEXT:    s_and_b32 s3, s3, 0xff
-; SI-NEXT:    s_add_i32 s17, s17, s17
 ; SI-NEXT:    s_add_i32 s11, s11, s11
 ; SI-NEXT:    s_add_i32 s10, s10, s10
-; SI-NEXT:    s_and_b32 s2, s2, 0xff
-; SI-NEXT:    s_add_i32 s16, s16, s16
+; SI-NEXT:    s_add_i32 s1, s1, s1
+; SI-NEXT:    s_add_i32 s17, s17, s17
 ; SI-NEXT:    s_add_i32 s9, s9, s9
 ; SI-NEXT:    s_add_i32 s8, s8, s8
-; SI-NEXT:    s_and_b32 s1, s1, 0xff
+; SI-NEXT:    s_add_i32 s0, s0, s0
+; SI-NEXT:    s_add_i32 s16, s16, s16
 ; SI-NEXT:    s_add_i32 s15, s15, s15
-; SI-NEXT:    s_add_i32 s6, s6, s6
-; SI-NEXT:    s_add_i32 s5, s5, s5
-; SI-NEXT:    s_and_b32 s0, s0, 0xff
 ; SI-NEXT:    s_add_i32 s14, s14, s14
-; SI-NEXT:    s_lshl_b32 s13, s13, 24
-; SI-NEXT:    s_and_b32 s12, s12, 0xff
-; SI-NEXT:    s_lshl_b32 s17, s17, 8
+; SI-NEXT:    s_and_b32 s3, s3, 0xff
+; SI-NEXT:    s_add_i32 s19, s19, s19
+; SI-NEXT:    s_add_i32 s13, s13, s13
+; SI-NEXT:    s_add_i32 s12, s12, s12
+; SI-NEXT:    s_and_b32 s2, s2, 0xff
+; SI-NEXT:    s_add_i32 s18, s18, s18
 ; SI-NEXT:    s_lshl_b32 s11, s11, 24
 ; SI-NEXT:    s_and_b32 s10, s10, 0xff
-; SI-NEXT:    s_lshl_b32 s16, s16, 8
+; SI-NEXT:    s_and_b32 s1, s1, 0xff
+; SI-NEXT:    s_lshl_b32 s17, s17, 8
 ; SI-NEXT:    s_lshl_b32 s9, s9, 24
 ; SI-NEXT:    s_and_b32 s8, s8, 0xff
-; SI-NEXT:    s_lshl_b32 s15, s15, 8
-; SI-NEXT:    s_lshl_b32 s6, s6, 24
-; SI-NEXT:    s_and_b32 s5, s5, 0xff
-; SI-NEXT:    s_lshl_b32 s14, s14, 8
-; SI-NEXT:    s_lshl_b32 s12, s12, 16
-; SI-NEXT:    s_or_b32 s3, s3, s17
+; SI-NEXT:    s_and_b32 s0, s0, 0xff
+; SI-NEXT:    s_lshl_b32 s16, s16, 8
+; SI-NEXT:    s_lshl_b32 s15, s15, 24
+; SI-NEXT:    s_and_b32 s14, s14, 0xff
+; SI-NEXT:    s_lshl_b32 s19, s19, 8
+; SI-NEXT:    s_lshl_b32 s13, s13, 24
+; SI-NEXT:    s_and_b32 s12, s12, 0xff
+; SI-NEXT:    s_lshl_b32 s18, s18, 8
 ; SI-NEXT:    s_lshl_b32 s10, s10, 16
-; SI-NEXT:    s_or_b32 s2, s2, s16
+; SI-NEXT:    s_or_b32 s1, s1, s17
 ; SI-NEXT:    s_lshl_b32 s8, s8, 16
-; SI-NEXT:    s_or_b32 s1, s1, s15
-; SI-NEXT:    s_lshl_b32 s5, s5, 16
-; SI-NEXT:    s_or_b32 s0, s0, s14
-; SI-NEXT:    s_or_b32 s12, s13, s12
-; SI-NEXT:    s_and_b32 s3, s3, 0xffff
+; SI-NEXT:    s_or_b32 s0, s0, s16
+; SI-NEXT:    s_lshl_b32 s14, s14, 16
+; SI-NEXT:    s_or_b32 s3, s3, s19
+; SI-NEXT:    s_lshl_b32 s12, s12, 16
+; SI-NEXT:    s_or_b32 s2, s2, s18
 ; SI-NEXT:    s_or_b32 s10, s11, s10
-; SI-NEXT:    s_and_b32 s2, s2, 0xffff
-; SI-NEXT:    s_or_b32 s8, s9, s8
 ; SI-NEXT:    s_and_b32 s1, s1, 0xffff
-; SI-NEXT:    s_or_b32 s5, s6, s5
+; SI-NEXT:    s_or_b32 s8, s9, s8
 ; SI-NEXT:    s_and_b32 s0, s0, 0xffff
-; SI-NEXT:    s_or_b32 s3, s3, s12
-; SI-NEXT:    s_or_b32 s2, s2, s10
-; SI-NEXT:    s_or_b32 s1, s1, s8
-; SI-NEXT:    s_or_b32 s0, s0, s5
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_or_b32 s9, s15, s14
+; SI-NEXT:    s_and_b32 s3, s3, 0xffff
+; SI-NEXT:    s_or_b32 s11, s13, s12
+; SI-NEXT:    s_and_b32 s2, s2, 0xffff
+; SI-NEXT:    s_or_b32 s1, s1, s10
+; SI-NEXT:    s_or_b32 s0, s0, s8
+; SI-NEXT:    s_or_b32 s3, s3, s9
+; SI-NEXT:    s_or_b32 s2, s2, s11
 ; SI-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-NEXT:    v_mov_b32_e32 v1, s1
 ; SI-NEXT:    v_mov_b32_e32 v2, s2
@@ -1671,151 +1671,150 @@ define amdgpu_kernel void @amd_kernel_v32i8(<32 x i8> %arg0) {
 ; SI-LABEL: amd_kernel_v32i8:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_add_i32 s12, s6, s6
+; SI-NEXT:    s_add_i32 s13, s5, s5
+; SI-NEXT:    s_add_i32 s14, s4, s4
 ; SI-NEXT:    s_mov_b32 s9, 0
 ; SI-NEXT:    s_mov_b32 s8, 16
 ; SI-NEXT:    s_mov_b32 s11, 0xf000
 ; SI-NEXT:    s_mov_b32 s10, -1
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_lshr_b32 s12, s4, 16
-; SI-NEXT:    s_lshr_b32 s13, s4, 24
-; SI-NEXT:    s_lshr_b32 s14, s5, 16
-; SI-NEXT:    s_lshr_b32 s15, s5, 24
-; SI-NEXT:    s_lshr_b32 s16, s6, 16
-; SI-NEXT:    s_lshr_b32 s17, s6, 24
-; SI-NEXT:    s_lshr_b32 s18, s7, 16
-; SI-NEXT:    s_lshr_b32 s19, s7, 24
-; SI-NEXT:    s_lshr_b32 s20, s0, 16
-; SI-NEXT:    s_lshr_b32 s21, s0, 24
-; SI-NEXT:    s_lshr_b32 s22, s1, 16
-; SI-NEXT:    s_lshr_b32 s23, s1, 24
-; SI-NEXT:    s_lshr_b32 s24, s2, 16
-; SI-NEXT:    s_lshr_b32 s25, s2, 24
-; SI-NEXT:    s_lshr_b32 s26, s3, 16
-; SI-NEXT:    s_lshr_b32 s27, s3, 24
-; SI-NEXT:    s_bfe_u32 s28, s4, 0x80008
-; SI-NEXT:    s_bfe_u32 s29, s5, 0x80008
-; SI-NEXT:    s_bfe_u32 s30, s6, 0x80008
+; SI-NEXT:    s_and_b32 s12, s12, 0xff
+; SI-NEXT:    s_and_b32 s13, s13, 0xff
+; SI-NEXT:    s_and_b32 s14, s14, 0xff
+; SI-NEXT:    s_lshr_b32 s15, s4, 16
+; SI-NEXT:    s_lshr_b32 s16, s4, 24
+; SI-NEXT:    s_lshr_b32 s17, s5, 16
+; SI-NEXT:    s_lshr_b32 s18, s5, 24
+; SI-NEXT:    s_lshr_b32 s19, s6, 16
+; SI-NEXT:    s_lshr_b32 s20, s6, 24
+; SI-NEXT:    s_lshr_b32 s21, s7, 16
+; SI-NEXT:    s_lshr_b32 s22, s7, 24
+; SI-NEXT:    s_lshr_b32 s23, s0, 16
+; SI-NEXT:    s_lshr_b32 s24, s0, 24
+; SI-NEXT:    s_lshr_b32 s25, s1, 16
+; SI-NEXT:    s_lshr_b32 s26, s1, 24
+; SI-NEXT:    s_lshr_b32 s27, s2, 16
+; SI-NEXT:    s_lshr_b32 s28, s2, 24
+; SI-NEXT:    s_lshr_b32 s29, s3, 16
+; SI-NEXT:    s_lshr_b32 s30, s3, 24
+; SI-NEXT:    s_bfe_u32 s4, s4, 0x80008
+; SI-NEXT:    s_bfe_u32 s5, s5, 0x80008
+; SI-NEXT:    s_bfe_u32 s6, s6, 0x80008
 ; SI-NEXT:    s_bfe_u32 s31, s7, 0x80008
 ; SI-NEXT:    s_bfe_u32 s33, s0, 0x80008
 ; SI-NEXT:    s_bfe_u32 s34, s1, 0x80008
 ; SI-NEXT:    s_bfe_u32 s35, s2, 0x80008
 ; SI-NEXT:    s_bfe_u32 s36, s3, 0x80008
 ; SI-NEXT:    s_add_i32 s3, s3, s3
-; SI-NEXT:    s_add_i32 s2, s2, s2
-; SI-NEXT:    s_add_i32 s1, s1, s1
-; SI-NEXT:    s_add_i32 s0, s0, s0
-; SI-NEXT:    s_add_i32 s7, s7, s7
-; SI-NEXT:    s_add_i32 s6, s6, s6
-; SI-NEXT:    s_add_i32 s5, s5, s5
-; SI-NEXT:    s_add_i32 s4, s4, s4
+; SI-NEXT:    s_add_i32 s28, s28, s28
 ; SI-NEXT:    s_add_i32 s27, s27, s27
+; SI-NEXT:    s_add_i32 s2, s2, s2
 ; SI-NEXT:    s_add_i32 s26, s26, s26
-; SI-NEXT:    s_and_b32 s3, s3, 0xff
-; SI-NEXT:    s_add_i32 s36, s36, s36
 ; SI-NEXT:    s_add_i32 s25, s25, s25
+; SI-NEXT:    s_add_i32 s1, s1, s1
+; SI-NEXT:    s_add_i32 s34, s34, s34
 ; SI-NEXT:    s_add_i32 s24, s24, s24
-; SI-NEXT:    s_and_b32 s2, s2, 0xff
-; SI-NEXT:    s_add_i32 s35, s35, s35
 ; SI-NEXT:    s_add_i32 s23, s23, s23
+; SI-NEXT:    s_add_i32 s0, s0, s0
+; SI-NEXT:    s_add_i32 s33, s33, s33
 ; SI-NEXT:    s_add_i32 s22, s22, s22
-; SI-NEXT:    s_and_b32 s1, s1, 0xff
-; SI-NEXT:    s_add_i32 s34, s34, s34
 ; SI-NEXT:    s_add_i32 s21, s21, s21
+; SI-NEXT:    s_add_i32 s7, s7, s7
+; SI-NEXT:    s_add_i32 s31, s31, s31
 ; SI-NEXT:    s_add_i32 s20, s20, s20
-; SI-NEXT:    s_and_b32 s0, s0, 0xff
-; SI-NEXT:    s_add_i32 s33, s33, s33
 ; SI-NEXT:    s_add_i32 s19, s19, s19
+; SI-NEXT:    s_add_i32 s6, s6, s6
 ; SI-NEXT:    s_add_i32 s18, s18, s18
-; SI-NEXT:    s_and_b32 s7, s7, 0xff
-; SI-NEXT:    s_add_i32 s31, s31, s31
 ; SI-NEXT:    s_add_i32 s17, s17, s17
+; SI-NEXT:    s_add_i32 s5, s5, s5
 ; SI-NEXT:    s_add_i32 s16, s16, s16
-; SI-NEXT:    s_and_b32 s6, s6, 0xff
-; SI-NEXT:    s_add_i32 s30, s30, s30
 ; SI-NEXT:    s_add_i32 s15, s15, s15
-; SI-NEXT:    s_add_i32 s14, s14, s14
-; SI-NEXT:    s_and_b32 s5, s5, 0xff
+; SI-NEXT:    s_add_i32 s4, s4, s4
+; SI-NEXT:    s_add_i32 s30, s30, s30
 ; SI-NEXT:    s_add_i32 s29, s29, s29
-; SI-NEXT:    s_add_i32 s13, s13, s13
-; SI-NEXT:    s_add_i32 s12, s12, s12
-; SI-NEXT:    s_and_b32 s4, s4, 0xff
-; SI-NEXT:    s_add_i32 s28, s28, s28
-; SI-NEXT:    s_lshl_b32 s27, s27, 24
-; SI-NEXT:    s_and_b32 s26, s26, 0xff
-; SI-NEXT:    s_lshl_b32 s36, s36, 8
-; SI-NEXT:    s_lshl_b32 s25, s25, 24
-; SI-NEXT:    s_and_b32 s24, s24, 0xff
-; SI-NEXT:    s_lshl_b32 s35, s35, 8
-; SI-NEXT:    s_lshl_b32 s23, s23, 24
-; SI-NEXT:    s_and_b32 s22, s22, 0xff
+; SI-NEXT:    s_and_b32 s3, s3, 0xff
+; SI-NEXT:    s_add_i32 s36, s36, s36
+; SI-NEXT:    s_lshl_b32 s28, s28, 24
+; SI-NEXT:    s_and_b32 s27, s27, 0xff
+; SI-NEXT:    s_and_b32 s2, s2, 0xff
+; SI-NEXT:    s_add_i32 s35, s35, s35
+; SI-NEXT:    s_lshl_b32 s26, s26, 24
+; SI-NEXT:    s_and_b32 s25, s25, 0xff
+; SI-NEXT:    s_and_b32 s1, s1, 0xff
 ; SI-NEXT:    s_lshl_b32 s34, s34, 8
-; SI-NEXT:    s_lshl_b32 s21, s21, 24
-; SI-NEXT:    s_and_b32 s20, s20, 0xff
+; SI-NEXT:    s_lshl_b32 s24, s24, 24
+; SI-NEXT:    s_and_b32 s23, s23, 0xff
+; SI-NEXT:    s_and_b32 s0, s0, 0xff
 ; SI-NEXT:    s_lshl_b32 s33, s33, 8
-; SI-NEXT:    s_lshl_b32 s19, s19, 24
-; SI-NEXT:    s_and_b32 s18, s18, 0xff
+; SI-NEXT:    s_lshl_b32 s22, s22, 24
+; SI-NEXT:    s_and_b32 s21, s21, 0xff
+; SI-NEXT:    s_and_b32 s7, s7, 0xff
 ; SI-NEXT:    s_lshl_b32 s31, s31, 8
-; SI-NEXT:    s_lshl_b32 s17, s17, 24
-; SI-NEXT:    s_and_b32 s16, s16, 0xff
-; SI-NEXT:    s_lshl_b32 s30, s30, 8
-; SI-NEXT:    s_lshl_b32 s15, s15, 24
-; SI-NEXT:    s_and_b32 s14, s14, 0xff
-; SI-NEXT:    s_lshl_b32 s29, s29, 8
-; SI-NEXT:    s_lshl_b32 s13, s13, 24
-; SI-NEXT:    s_and_b32 s12, s12, 0xff
-; SI-NEXT:    s_lshl_b32 s28, s28, 8
-; SI-NEXT:    s_lshl_b32 s26, s26, 16
-; SI-NEXT:    s_or_b32 s3, s3, s36
-; SI-NEXT:    s_lshl_b32 s24, s24, 16
-; SI-NEXT:    s_or_b32 s2, s2, s35
-; SI-NEXT:    s_lshl_b32 s22, s22, 16
+; SI-NEXT:    s_lshl_b32 s20, s20, 24
+; SI-NEXT:    s_and_b32 s19, s19, 0xff
+; SI-NEXT:    s_lshl_b32 s6, s6, 8
+; SI-NEXT:    s_lshl_b32 s18, s18, 24
+; SI-NEXT:    s_and_b32 s17, s17, 0xff
+; SI-NEXT:    s_lshl_b32 s5, s5, 8
+; SI-NEXT:    s_lshl_b32 s16, s16, 24
+; SI-NEXT:    s_and_b32 s15, s15, 0xff
+; SI-NEXT:    s_lshl_b32 s4, s4, 8
+; SI-NEXT:    s_lshl_b32 s30, s30, 24
+; SI-NEXT:    s_and_b32 s29, s29, 0xff
+; SI-NEXT:    s_lshl_b32 s36, s36, 8
+; SI-NEXT:    s_lshl_b32 s27, s27, 16
+; SI-NEXT:    s_lshl_b32 s35, s35, 8
+; SI-NEXT:    s_lshl_b32 s25, s25, 16
 ; SI-NEXT:    s_or_b32 s1, s1, s34
-; SI-NEXT:    s_lshl_b32 s20, s20, 16
+; SI-NEXT:    s_lshl_b32 s23, s23, 16
 ; SI-NEXT:    s_or_b32 s0, s0, s33
-; SI-NEXT:    s_lshl_b32 s18, s18, 16
+; SI-NEXT:    s_lshl_b32 s21, s21, 16
 ; SI-NEXT:    s_or_b32 s7, s7, s31
-; SI-NEXT:    s_lshl_b32 s16, s16, 16
-; SI-NEXT:    s_or_b32 s6, s6, s30
-; SI-NEXT:    s_lshl_b32 s14, s14, 16
-; SI-NEXT:    s_or_b32 s5, s5, s29
-; SI-NEXT:    s_lshl_b32 s12, s12, 16
-; SI-NEXT:    s_or_b32 s4, s4, s28
-; SI-NEXT:    s_or_b32 s26, s27, s26
-; SI-NEXT:    s_and_b32 s3, s3, 0xffff
-; SI-NEXT:    s_or_b32 s24, s25, s24
-; SI-NEXT:    s_and_b32 s2, s2, 0xffff
-; SI-NEXT:    s_or_b32 s22, s23, s22
+; SI-NEXT:    s_lshl_b32 s19, s19, 16
+; SI-NEXT:    s_or_b32 s6, s12, s6
+; SI-NEXT:    s_lshl_b32 s12, s17, 16
+; SI-NEXT:    s_or_b32 s5, s13, s5
+; SI-NEXT:    s_lshl_b32 s13, s15, 16
+; SI-NEXT:    s_or_b32 s4, s14, s4
+; SI-NEXT:    s_lshl_b32 s14, s29, 16
+; SI-NEXT:    s_or_b32 s3, s3, s36
+; SI-NEXT:    s_or_b32 s15, s28, s27
+; SI-NEXT:    s_or_b32 s2, s2, s35
+; SI-NEXT:    s_or_b32 s17, s26, s25
 ; SI-NEXT:    s_and_b32 s1, s1, 0xffff
-; SI-NEXT:    s_or_b32 s20, s21, s20
+; SI-NEXT:    s_or_b32 s23, s24, s23
 ; SI-NEXT:    s_and_b32 s0, s0, 0xffff
-; SI-NEXT:    s_or_b32 s18, s19, s18
+; SI-NEXT:    s_or_b32 s21, s22, s21
 ; SI-NEXT:    s_and_b32 s7, s7, 0xffff
-; SI-NEXT:    s_or_b32 s16, s17, s16
+; SI-NEXT:    s_or_b32 s19, s20, s19
 ; SI-NEXT:    s_and_b32 s6, s6, 0xffff
-; SI-NEXT:    s_or_b32 s14, s15, s14
+; SI-NEXT:    s_or_b32 s12, s18, s12
 ; SI-NEXT:    s_and_b32 s5, s5, 0xffff
-; SI-NEXT:    s_or_b32 s12, s13, s12
+; SI-NEXT:    s_or_b32 s13, s16, s13
 ; SI-NEXT:    s_and_b32 s4, s4, 0xffff
-; SI-NEXT:    s_or_b32 s3, s3, s26
-; SI-NEXT:    s_or_b32 s2, s2, s24
-; SI-NEXT:    s_or_b32 s1, s1, s22
-; SI-NEXT:    s_or_b32 s7, s7, s18
-; SI-NEXT:    s_or_b32 s6, s6, s16
-; SI-NEXT:    s_or_b32 s5, s5, s14
-; SI-NEXT:    s_or_b32 s4, s4, s12
-; SI-NEXT:    s_or_b32 s0, s0, s20
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    s_or_b32 s14, s30, s14
+; SI-NEXT:    s_and_b32 s3, s3, 0xffff
+; SI-NEXT:    s_and_b32 s2, s2, 0xffff
+; SI-NEXT:    s_or_b32 s1, s1, s17
+; SI-NEXT:    s_or_b32 s0, s0, s23
+; SI-NEXT:    s_or_b32 s7, s7, s21
+; SI-NEXT:    s_or_b32 s6, s6, s19
+; SI-NEXT:    s_or_b32 s5, s5, s12
+; SI-NEXT:    s_or_b32 s4, s4, s13
 ; SI-NEXT:    v_mov_b32_e32 v2, s6
 ; SI-NEXT:    v_mov_b32_e32 v3, s7
+; SI-NEXT:    v_mov_b32_e32 v4, s0
+; SI-NEXT:    v_mov_b32_e32 v5, s1
+; SI-NEXT:    s_or_b32 s0, s3, s14
+; SI-NEXT:    s_or_b32 s1, s2, s15
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
-; SI-NEXT:    s_waitcnt expcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    v_mov_b32_e32 v1, s1
-; SI-NEXT:    v_mov_b32_e32 v2, s2
-; SI-NEXT:    v_mov_b32_e32 v3, s3
+; SI-NEXT:    v_mov_b32_e32 v6, s1
+; SI-NEXT:    v_mov_b32_e32 v7, s0
 ; SI-NEXT:    s_mov_b32 s8, s9
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: amd_kernel_v32i8:
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index e874ee56f594cae..927bc2171e9f714 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -557,13 +557,13 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
+; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
-; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
 ; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -602,13 +602,13 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
+; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
 ; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
 ; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
 ; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
 ; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
 ; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
-; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
-; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
 ; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index 0723290bdf734dd..278960e2114d580 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -558,13 +558,13 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v5, v13, v5
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v6, v14, v6
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, v8, v0
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v1, v9, v1
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v2, v10, v2
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v3, v11, v3
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v4, v12, v4
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v5, v13, v5
-; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v6, v14, v6
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v7, v15, v7
 ; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -603,13 +603,13 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
 ; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NNAN-NEXT:    v_min_f32_e32 v5, v5, v13
+; SI-NNAN-NEXT:    v_min_f32_e32 v6, v6, v14
 ; SI-NNAN-NEXT:    v_min_f32_e32 v0, v0, v8
 ; SI-NNAN-NEXT:    v_min_f32_e32 v1, v1, v9
 ; SI-NNAN-NEXT:    v_min_f32_e32 v2, v2, v10
 ; SI-NNAN-NEXT:    v_min_f32_e32 v3, v3, v11
 ; SI-NNAN-NEXT:    v_min_f32_e32 v4, v4, v12
-; SI-NNAN-NEXT:    v_min_f32_e32 v5, v5, v13
-; SI-NNAN-NEXT:    v_min_f32_e32 v6, v6, v14
 ; SI-NNAN-NEXT:    v_min_f32_e32 v7, v7, v15
 ; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 2e7fa86e8ab8b6c..d31635848cbe0cd 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -1646,56 +1646,54 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_u32 s31, s4, 0x10016
 ; GFX6-NEXT:    s_bfe_u32 s33, s4, 0x10014
 ; GFX6-NEXT:    s_bfe_u32 s34, s4, 0x1001a
-; GFX6-NEXT:    s_bfe_u32 s35, s4, 0x1001e
-; GFX6-NEXT:    s_bfe_u32 s36, s4, 0x1001c
-; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x10018
-; GFX6-NEXT:    v_mov_b32_e32 v0, s36
-; GFX6-NEXT:    v_mov_b32_e32 v1, s20
-; GFX6-NEXT:    v_mov_b32_e32 v2, s35
-; GFX6-NEXT:    v_mov_b32_e32 v3, s19
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    s_bfe_u32 s35, s4, 0x10018
+; GFX6-NEXT:    s_bfe_u32 s36, s4, 0x1001e
+; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x1001c
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s18
-; GFX6-NEXT:    v_mov_b32_e32 v2, s34
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s33
-; GFX6-NEXT:    v_mov_b32_e32 v1, s16
-; GFX6-NEXT:    v_mov_b32_e32 v2, s31
-; GFX6-NEXT:    v_mov_b32_e32 v3, s15
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    v_mov_b32_e32 v4, s16
+; GFX6-NEXT:    v_mov_b32_e32 v6, s15
+; GFX6-NEXT:    v_mov_b32_e32 v7, s14
+; GFX6-NEXT:    v_mov_b32_e32 v8, s29
+; GFX6-NEXT:    v_mov_b32_e32 v9, s13
+; GFX6-NEXT:    v_mov_b32_e32 v10, s28
+; GFX6-NEXT:    v_mov_b32_e32 v11, s12
+; GFX6-NEXT:    v_mov_b32_e32 v12, s27
+; GFX6-NEXT:    v_mov_b32_e32 v13, s11
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s30
-; GFX6-NEXT:    v_mov_b32_e32 v1, s14
-; GFX6-NEXT:    v_mov_b32_e32 v2, s29
-; GFX6-NEXT:    v_mov_b32_e32 v3, s13
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    v_mov_b32_e32 v10, s26
+; GFX6-NEXT:    v_mov_b32_e32 v11, s10
+; GFX6-NEXT:    v_mov_b32_e32 v12, s25
+; GFX6-NEXT:    v_mov_b32_e32 v13, s9
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s28
-; GFX6-NEXT:    v_mov_b32_e32 v1, s12
-; GFX6-NEXT:    v_mov_b32_e32 v2, s27
-; GFX6-NEXT:    v_mov_b32_e32 v3, s11
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    v_mov_b32_e32 v10, s24
+; GFX6-NEXT:    v_mov_b32_e32 v11, s8
+; GFX6-NEXT:    v_mov_b32_e32 v12, s23
+; GFX6-NEXT:    v_mov_b32_e32 v13, s7
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s26
-; GFX6-NEXT:    v_mov_b32_e32 v1, s10
-; GFX6-NEXT:    v_mov_b32_e32 v2, s25
-; GFX6-NEXT:    v_mov_b32_e32 v3, s9
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    v_mov_b32_e32 v10, s21
+; GFX6-NEXT:    v_mov_b32_e32 v11, s6
+; GFX6-NEXT:    v_mov_b32_e32 v12, s22
+; GFX6-NEXT:    v_mov_b32_e32 v13, s5
+; GFX6-NEXT:    v_mov_b32_e32 v14, s4
+; GFX6-NEXT:    v_mov_b32_e32 v15, s20
+; GFX6-NEXT:    v_mov_b32_e32 v16, s36
+; GFX6-NEXT:    v_mov_b32_e32 v17, s19
+; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    v_mov_b32_e32 v0, s35
+; GFX6-NEXT:    v_mov_b32_e32 v2, s34
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s24
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_mov_b32_e32 v2, s23
-; GFX6-NEXT:    v_mov_b32_e32 v3, s7
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v3, s33
+; GFX6-NEXT:    v_mov_b32_e32 v5, s31
+; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s21
-; GFX6-NEXT:    v_mov_b32_e32 v1, s6
-; GFX6-NEXT:    v_mov_b32_e32 v2, s22
-; GFX6-NEXT:    v_mov_b32_e32 v3, s5
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NEXT:    v_mov_b32_e32 v6, s30
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_zextload_v32i1_to_v32i32:
@@ -1956,58 +1954,56 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_i32 s29, s4, 0x1001b
 ; GFX6-NEXT:    s_bfe_i32 s30, s4, 0x1001a
 ; GFX6-NEXT:    s_bfe_i32 s31, s4, 0x10019
-; GFX6-NEXT:    s_ashr_i32 s33, s4, 31
-; GFX6-NEXT:    s_bfe_i32 s34, s4, 0x1001e
-; GFX6-NEXT:    s_bfe_i32 s35, s4, 0x1001d
-; GFX6-NEXT:    s_bfe_i32 s36, s4, 0x1001c
-; GFX6-NEXT:    s_bfe_i32 s4, s4, 0x10018
-; GFX6-NEXT:    v_mov_b32_e32 v0, s36
-; GFX6-NEXT:    v_mov_b32_e32 v1, s35
-; GFX6-NEXT:    v_mov_b32_e32 v2, s34
-; GFX6-NEXT:    v_mov_b32_e32 v3, s33
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s4
-; GFX6-NEXT:    v_mov_b32_e32 v1, s31
-; GFX6-NEXT:    v_mov_b32_e32 v2, s30
-; GFX6-NEXT:    v_mov_b32_e32 v3, s29
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s28
-; GFX6-NEXT:    v_mov_b32_e32 v1, s27
-; GFX6-NEXT:    v_mov_b32_e32 v2, s26
+; GFX6-NEXT:    s_bfe_i32 s33, s4, 0x10018
+; GFX6-NEXT:    s_ashr_i32 s34, s4, 31
+; GFX6-NEXT:    s_bfe_i32 s35, s4, 0x1001e
+; GFX6-NEXT:    s_bfe_i32 s36, s4, 0x1001d
+; GFX6-NEXT:    s_bfe_i32 s4, s4, 0x1001c
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s25
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    v_mov_b32_e32 v4, s24
+; GFX6-NEXT:    v_mov_b32_e32 v5, s23
+; GFX6-NEXT:    v_mov_b32_e32 v6, s22
+; GFX6-NEXT:    v_mov_b32_e32 v7, s21
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s24
-; GFX6-NEXT:    v_mov_b32_e32 v1, s23
-; GFX6-NEXT:    v_mov_b32_e32 v2, s22
-; GFX6-NEXT:    v_mov_b32_e32 v3, s21
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    v_mov_b32_e32 v4, s20
+; GFX6-NEXT:    v_mov_b32_e32 v5, s19
+; GFX6-NEXT:    v_mov_b32_e32 v6, s18
+; GFX6-NEXT:    v_mov_b32_e32 v7, s17
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s20
-; GFX6-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NEXT:    v_mov_b32_e32 v2, s18
-; GFX6-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    v_mov_b32_e32 v4, s16
+; GFX6-NEXT:    v_mov_b32_e32 v5, s15
+; GFX6-NEXT:    v_mov_b32_e32 v6, s14
+; GFX6-NEXT:    v_mov_b32_e32 v7, s13
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s16
-; GFX6-NEXT:    v_mov_b32_e32 v1, s15
-; GFX6-NEXT:    v_mov_b32_e32 v2, s14
-; GFX6-NEXT:    v_mov_b32_e32 v3, s13
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    v_mov_b32_e32 v4, s12
+; GFX6-NEXT:    v_mov_b32_e32 v5, s11
+; GFX6-NEXT:    v_mov_b32_e32 v6, s10
+; GFX6-NEXT:    v_mov_b32_e32 v7, s9
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s12
-; GFX6-NEXT:    v_mov_b32_e32 v1, s11
-; GFX6-NEXT:    v_mov_b32_e32 v2, s10
-; GFX6-NEXT:    v_mov_b32_e32 v3, s9
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v4, s8
+; GFX6-NEXT:    v_mov_b32_e32 v5, s7
+; GFX6-NEXT:    v_mov_b32_e32 v6, s6
+; GFX6-NEXT:    v_mov_b32_e32 v7, s5
+; GFX6-NEXT:    v_mov_b32_e32 v8, s4
+; GFX6-NEXT:    v_mov_b32_e32 v9, s36
+; GFX6-NEXT:    v_mov_b32_e32 v10, s35
+; GFX6-NEXT:    v_mov_b32_e32 v11, s34
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s8
-; GFX6-NEXT:    v_mov_b32_e32 v1, s7
-; GFX6-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NEXT:    v_mov_b32_e32 v3, s5
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NEXT:    v_mov_b32_e32 v8, s33
+; GFX6-NEXT:    v_mov_b32_e32 v9, s31
+; GFX6-NEXT:    v_mov_b32_e32 v10, s30
+; GFX6-NEXT:    v_mov_b32_e32 v11, s29
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    v_mov_b32_e32 v0, s28
+; GFX6-NEXT:    v_mov_b32_e32 v1, s27
+; GFX6-NEXT:    v_mov_b32_e32 v2, s26
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_sextload_v32i1_to_v32i32:
@@ -2317,109 +2313,110 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_u32 s57, s3, 0x1000a
 ; GFX6-NEXT:    s_bfe_u32 s58, s3, 0x10008
 ; GFX6-NEXT:    s_bfe_u32 s59, s3, 0x1000e
-; GFX6-NEXT:    s_bfe_u32 s60, s3, 0x10012
-; GFX6-NEXT:    s_bfe_u32 s61, s3, 0x10010
-; GFX6-NEXT:    s_bfe_u32 s62, s3, 0x10016
-; GFX6-NEXT:    s_bfe_u32 s63, s3, 0x1001a
-; GFX6-NEXT:    s_bfe_u32 s64, s3, 0x10018
-; GFX6-NEXT:    s_bfe_u32 s65, s3, 0x1001e
-; GFX6-NEXT:    s_bfe_u32 s66, s3, 0x1001c
-; GFX6-NEXT:    s_bfe_u32 s67, s3, 0x10014
-; GFX6-NEXT:    s_bfe_u32 s68, s3, 0x1000c
+; GFX6-NEXT:    s_bfe_u32 s60, s3, 0x1000c
+; GFX6-NEXT:    s_bfe_u32 s61, s3, 0x10012
+; GFX6-NEXT:    s_bfe_u32 s62, s3, 0x10010
+; GFX6-NEXT:    s_bfe_u32 s63, s3, 0x10016
+; GFX6-NEXT:    s_bfe_u32 s64, s3, 0x10014
+; GFX6-NEXT:    s_bfe_u32 s65, s3, 0x1001a
+; GFX6-NEXT:    s_bfe_u32 s66, s3, 0x10018
+; GFX6-NEXT:    s_bfe_u32 s67, s3, 0x1001e
+; GFX6-NEXT:    s_bfe_u32 s68, s3, 0x1001c
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    v_mov_b32_e32 v0, s66
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s36
-; GFX6-NEXT:    v_mov_b32_e32 v2, s65
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s35
-; GFX6-NEXT:    v_mov_b32_e32 v4, s64
-; GFX6-NEXT:    v_mov_b32_e32 v5, s34
-; GFX6-NEXT:    v_mov_b32_e32 v6, s63
-; GFX6-NEXT:    v_mov_b32_e32 v7, s33
-; GFX6-NEXT:    v_mov_b32_e32 v8, s67
-; GFX6-NEXT:    v_mov_b32_e32 v9, s31
-; GFX6-NEXT:    v_mov_b32_e32 v10, s62
-; GFX6-NEXT:    v_mov_b32_e32 v11, s30
-; GFX6-NEXT:    v_mov_b32_e32 v12, s61
-; GFX6-NEXT:    v_mov_b32_e32 v13, s29
-; GFX6-NEXT:    v_mov_b32_e32 v14, s60
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GFX6-NEXT:    v_mov_b32_e32 v15, s28
-; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GFX6-NEXT:    s_waitcnt expcnt(3)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s68
-; GFX6-NEXT:    v_mov_b32_e32 v1, s27
-; GFX6-NEXT:    v_mov_b32_e32 v2, s59
-; GFX6-NEXT:    v_mov_b32_e32 v3, s26
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GFX6-NEXT:    v_mov_b32_e32 v4, s34
+; GFX6-NEXT:    v_mov_b32_e32 v6, s33
+; GFX6-NEXT:    v_mov_b32_e32 v7, s31
+; GFX6-NEXT:    v_mov_b32_e32 v9, s30
+; GFX6-NEXT:    v_mov_b32_e32 v10, s29
+; GFX6-NEXT:    v_mov_b32_e32 v12, s28
+; GFX6-NEXT:    v_mov_b32_e32 v13, s25
+; GFX6-NEXT:    v_mov_b32_e32 v16, s56
+; GFX6-NEXT:    v_mov_b32_e32 v15, s24
+; GFX6-NEXT:    v_mov_b32_e32 v17, s23
+; GFX6-NEXT:    v_mov_b32_e32 v18, s55
+; GFX6-NEXT:    v_mov_b32_e32 v19, s22
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s58
-; GFX6-NEXT:    v_mov_b32_e32 v1, s25
-; GFX6-NEXT:    v_mov_b32_e32 v2, s57
-; GFX6-NEXT:    v_mov_b32_e32 v3, s24
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
+; GFX6-NEXT:    v_mov_b32_e32 v16, s53
+; GFX6-NEXT:    v_mov_b32_e32 v17, s21
+; GFX6-NEXT:    v_mov_b32_e32 v18, s54
+; GFX6-NEXT:    v_mov_b32_e32 v19, s20
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:128
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s56
-; GFX6-NEXT:    v_mov_b32_e32 v1, s23
-; GFX6-NEXT:    v_mov_b32_e32 v2, s55
-; GFX6-NEXT:    v_mov_b32_e32 v3, s22
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GFX6-NEXT:    v_mov_b32_e32 v16, s52
+; GFX6-NEXT:    v_mov_b32_e32 v17, s19
+; GFX6-NEXT:    v_mov_b32_e32 v18, s51
+; GFX6-NEXT:    v_mov_b32_e32 v19, s18
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s53
-; GFX6-NEXT:    v_mov_b32_e32 v1, s21
-; GFX6-NEXT:    v_mov_b32_e32 v2, s54
-; GFX6-NEXT:    v_mov_b32_e32 v3, s20
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GFX6-NEXT:    v_mov_b32_e32 v16, s50
+; GFX6-NEXT:    v_mov_b32_e32 v17, s17
+; GFX6-NEXT:    v_mov_b32_e32 v18, s49
+; GFX6-NEXT:    v_mov_b32_e32 v19, s16
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s52
-; GFX6-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NEXT:    v_mov_b32_e32 v2, s51
-; GFX6-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    v_mov_b32_e32 v16, s48
+; GFX6-NEXT:    v_mov_b32_e32 v17, s15
+; GFX6-NEXT:    v_mov_b32_e32 v18, s47
+; GFX6-NEXT:    v_mov_b32_e32 v19, s14
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s50
-; GFX6-NEXT:    v_mov_b32_e32 v1, s17
-; GFX6-NEXT:    v_mov_b32_e32 v2, s49
-; GFX6-NEXT:    v_mov_b32_e32 v3, s16
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    v_mov_b32_e32 v16, s46
+; GFX6-NEXT:    v_mov_b32_e32 v17, s13
+; GFX6-NEXT:    v_mov_b32_e32 v18, s45
+; GFX6-NEXT:    v_mov_b32_e32 v19, s12
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s48
-; GFX6-NEXT:    v_mov_b32_e32 v1, s15
-; GFX6-NEXT:    v_mov_b32_e32 v2, s47
-; GFX6-NEXT:    v_mov_b32_e32 v3, s14
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    v_mov_b32_e32 v16, s44
+; GFX6-NEXT:    v_mov_b32_e32 v17, s11
+; GFX6-NEXT:    v_mov_b32_e32 v18, s43
+; GFX6-NEXT:    v_mov_b32_e32 v19, s10
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s46
-; GFX6-NEXT:    v_mov_b32_e32 v1, s13
-; GFX6-NEXT:    v_mov_b32_e32 v2, s45
-; GFX6-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    v_mov_b32_e32 v16, s42
+; GFX6-NEXT:    v_mov_b32_e32 v17, s9
+; GFX6-NEXT:    v_mov_b32_e32 v18, s41
+; GFX6-NEXT:    v_mov_b32_e32 v19, s8
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s44
-; GFX6-NEXT:    v_mov_b32_e32 v1, s11
-; GFX6-NEXT:    v_mov_b32_e32 v2, s43
-; GFX6-NEXT:    v_mov_b32_e32 v3, s10
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    v_mov_b32_e32 v16, s40
+; GFX6-NEXT:    v_mov_b32_e32 v17, s7
+; GFX6-NEXT:    v_mov_b32_e32 v18, s39
+; GFX6-NEXT:    v_mov_b32_e32 v19, s6
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s42
-; GFX6-NEXT:    v_mov_b32_e32 v1, s9
-; GFX6-NEXT:    v_mov_b32_e32 v2, s41
-; GFX6-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    v_mov_b32_e32 v17, s27
+; GFX6-NEXT:    v_mov_b32_e32 v19, s26
+; GFX6-NEXT:    v_mov_b32_e32 v0, s68
+; GFX6-NEXT:    v_mov_b32_e32 v2, s67
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
+; GFX6-NEXT:    v_mov_b32_e32 v20, s37
+; GFX6-NEXT:    v_mov_b32_e32 v21, s5
+; GFX6-NEXT:    v_mov_b32_e32 v22, s38
+; GFX6-NEXT:    v_mov_b32_e32 v23, s4
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s40
-; GFX6-NEXT:    v_mov_b32_e32 v1, s7
-; GFX6-NEXT:    v_mov_b32_e32 v2, s39
-; GFX6-NEXT:    v_mov_b32_e32 v3, s6
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v3, s66
+; GFX6-NEXT:    v_mov_b32_e32 v5, s65
+; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:224
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s37
-; GFX6-NEXT:    v_mov_b32_e32 v1, s5
-; GFX6-NEXT:    v_mov_b32_e32 v2, s38
-; GFX6-NEXT:    v_mov_b32_e32 v3, s4
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NEXT:    v_mov_b32_e32 v6, s64
+; GFX6-NEXT:    v_mov_b32_e32 v8, s63
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:208
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v9, s62
+; GFX6-NEXT:    v_mov_b32_e32 v11, s61
+; GFX6-NEXT:    v_mov_b32_e32 v16, s60
+; GFX6-NEXT:    v_mov_b32_e32 v18, s59
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
+; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:192
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v12, s58
+; GFX6-NEXT:    v_mov_b32_e32 v14, s57
+; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:160
+; GFX6-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_zextload_v64i1_to_v64i32:
@@ -2861,143 +2858,114 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_i32 s9, s2, 0x10006
 ; GFX6-NEXT:    s_bfe_i32 s10, s2, 0x10005
 ; GFX6-NEXT:    s_bfe_i32 s11, s2, 0x10004
-; GFX6-NEXT:    s_bfe_i32 s12, s2, 0x1000b
-; GFX6-NEXT:    s_bfe_i32 s13, s2, 0x1000a
-; GFX6-NEXT:    s_bfe_i32 s14, s2, 0x10009
-; GFX6-NEXT:    s_bfe_i32 s15, s2, 0x10008
-; GFX6-NEXT:    s_bfe_i32 s16, s2, 0x1000f
-; GFX6-NEXT:    s_bfe_i32 s17, s2, 0x1000e
-; GFX6-NEXT:    s_bfe_i32 s18, s2, 0x1000d
-; GFX6-NEXT:    s_bfe_i32 s19, s2, 0x1000c
-; GFX6-NEXT:    s_bfe_i32 s20, s2, 0x10013
-; GFX6-NEXT:    s_bfe_i32 s21, s2, 0x10012
-; GFX6-NEXT:    s_bfe_i32 s22, s2, 0x10011
-; GFX6-NEXT:    s_bfe_i32 s23, s2, 0x10010
-; GFX6-NEXT:    s_bfe_i32 s24, s2, 0x10017
-; GFX6-NEXT:    s_bfe_i32 s25, s2, 0x10016
-; GFX6-NEXT:    s_bfe_i32 s26, s2, 0x10015
-; GFX6-NEXT:    s_bfe_i32 s27, s2, 0x10014
-; GFX6-NEXT:    s_bfe_i32 s28, s2, 0x1001b
-; GFX6-NEXT:    s_bfe_i32 s29, s2, 0x1001a
-; GFX6-NEXT:    s_bfe_i32 s30, s2, 0x10019
-; GFX6-NEXT:    s_bfe_i32 s31, s2, 0x10018
-; GFX6-NEXT:    s_ashr_i32 s33, s2, 31
-; GFX6-NEXT:    s_bfe_i32 s34, s2, 0x1001e
-; GFX6-NEXT:    s_bfe_i32 s35, s2, 0x1001d
-; GFX6-NEXT:    s_bfe_i32 s36, s2, 0x1001c
-; GFX6-NEXT:    s_bfe_i32 s37, s3, 0x10003
-; GFX6-NEXT:    s_bfe_i32 s38, s3, 0x10002
-; GFX6-NEXT:    s_bfe_i32 s39, s3, 0x10001
-; GFX6-NEXT:    s_bfe_i32 s40, s3, 0x10000
-; GFX6-NEXT:    s_bfe_i32 s41, s3, 0x10007
-; GFX6-NEXT:    s_bfe_i32 s42, s3, 0x10006
-; GFX6-NEXT:    s_bfe_i32 s43, s3, 0x10005
-; GFX6-NEXT:    s_bfe_i32 s44, s3, 0x10004
-; GFX6-NEXT:    s_bfe_i32 s45, s3, 0x1000b
-; GFX6-NEXT:    s_bfe_i32 s46, s3, 0x1000a
-; GFX6-NEXT:    s_bfe_i32 s47, s3, 0x10009
-; GFX6-NEXT:    s_bfe_i32 s48, s3, 0x10008
-; GFX6-NEXT:    s_bfe_i32 s49, s3, 0x1000f
-; GFX6-NEXT:    s_bfe_i32 s50, s3, 0x1000e
-; GFX6-NEXT:    s_bfe_i32 s51, s3, 0x1000d
+; GFX6-NEXT:    s_bfe_i32 s15, s2, 0x1000b
+; GFX6-NEXT:    s_bfe_i32 s16, s2, 0x1000a
+; GFX6-NEXT:    s_bfe_i32 s17, s2, 0x10009
+; GFX6-NEXT:    s_bfe_i32 s18, s2, 0x10008
+; GFX6-NEXT:    s_bfe_i32 s19, s2, 0x1000f
+; GFX6-NEXT:    s_bfe_i32 s20, s2, 0x1000e
+; GFX6-NEXT:    s_bfe_i32 s21, s2, 0x1000d
+; GFX6-NEXT:    s_bfe_i32 s22, s2, 0x1000c
+; GFX6-NEXT:    s_bfe_i32 s23, s2, 0x10013
+; GFX6-NEXT:    s_bfe_i32 s24, s2, 0x10012
+; GFX6-NEXT:    s_bfe_i32 s25, s2, 0x10011
+; GFX6-NEXT:    s_bfe_i32 s26, s2, 0x10010
+; GFX6-NEXT:    s_bfe_i32 s27, s2, 0x10017
+; GFX6-NEXT:    s_bfe_i32 s28, s2, 0x10016
+; GFX6-NEXT:    s_bfe_i32 s29, s2, 0x10015
+; GFX6-NEXT:    s_bfe_i32 s30, s2, 0x10014
+; GFX6-NEXT:    s_bfe_i32 s31, s2, 0x1001b
+; GFX6-NEXT:    s_bfe_i32 s33, s2, 0x1001a
+; GFX6-NEXT:    s_bfe_i32 s34, s2, 0x10019
+; GFX6-NEXT:    s_bfe_i32 s35, s2, 0x10018
+; GFX6-NEXT:    s_ashr_i32 s36, s2, 31
+; GFX6-NEXT:    s_bfe_i32 s37, s2, 0x1001e
+; GFX6-NEXT:    s_bfe_i32 s38, s2, 0x1001d
+; GFX6-NEXT:    s_bfe_i32 s39, s2, 0x1001c
+; GFX6-NEXT:    s_bfe_i32 s40, s3, 0x10003
+; GFX6-NEXT:    s_bfe_i32 s41, s3, 0x10002
+; GFX6-NEXT:    s_bfe_i32 s42, s3, 0x10001
+; GFX6-NEXT:    s_bfe_i32 s43, s3, 0x10000
+; GFX6-NEXT:    s_bfe_i32 s44, s3, 0x10007
+; GFX6-NEXT:    s_bfe_i32 s45, s3, 0x10006
+; GFX6-NEXT:    s_bfe_i32 s46, s3, 0x10005
+; GFX6-NEXT:    s_bfe_i32 s47, s3, 0x10004
+; GFX6-NEXT:    s_bfe_i32 s48, s3, 0x1000b
+; GFX6-NEXT:    s_bfe_i32 s49, s3, 0x1000a
+; GFX6-NEXT:    s_bfe_i32 s50, s3, 0x10009
+; GFX6-NEXT:    s_bfe_i32 s51, s3, 0x10008
+; GFX6-NEXT:    s_bfe_i32 s12, s3, 0x1000f
+; GFX6-NEXT:    s_bfe_i32 s13, s3, 0x1000e
+; GFX6-NEXT:    s_bfe_i32 s14, s3, 0x1000d
 ; GFX6-NEXT:    s_bfe_i32 s52, s3, 0x1000c
-; GFX6-NEXT:    s_bfe_i32 s53, s3, 0x10012
-; GFX6-NEXT:    s_bfe_i32 s54, s3, 0x10011
-; GFX6-NEXT:    s_bfe_i32 s55, s3, 0x10010
-; GFX6-NEXT:    s_bfe_i32 s56, s3, 0x10017
-; GFX6-NEXT:    s_bfe_i32 s57, s3, 0x10016
-; GFX6-NEXT:    s_bfe_i32 s58, s3, 0x10015
-; GFX6-NEXT:    s_bfe_i32 s59, s3, 0x1001b
-; GFX6-NEXT:    s_bfe_i32 s60, s3, 0x1001a
-; GFX6-NEXT:    s_bfe_i32 s61, s3, 0x10019
-; GFX6-NEXT:    s_bfe_i32 s62, s3, 0x10018
-; GFX6-NEXT:    s_ashr_i32 s63, s3, 31
-; GFX6-NEXT:    s_bfe_i32 s64, s3, 0x1001e
-; GFX6-NEXT:    s_bfe_i32 s65, s3, 0x1001d
-; GFX6-NEXT:    s_bfe_i32 s66, s3, 0x1001c
-; GFX6-NEXT:    s_bfe_i32 s67, s3, 0x10014
-; GFX6-NEXT:    s_bfe_i32 s68, s3, 0x10013
+; GFX6-NEXT:    s_bfe_i32 s53, s3, 0x10013
+; GFX6-NEXT:    s_bfe_i32 s54, s3, 0x10012
+; GFX6-NEXT:    s_bfe_i32 s55, s3, 0x10011
+; GFX6-NEXT:    s_bfe_i32 s56, s3, 0x10010
+; GFX6-NEXT:    s_bfe_i32 s57, s3, 0x10017
+; GFX6-NEXT:    s_bfe_i32 s58, s3, 0x10016
+; GFX6-NEXT:    s_bfe_i32 s59, s3, 0x10015
+; GFX6-NEXT:    s_bfe_i32 s60, s3, 0x10014
+; GFX6-NEXT:    s_bfe_i32 s61, s3, 0x1001b
+; GFX6-NEXT:    s_bfe_i32 s62, s3, 0x1001a
+; GFX6-NEXT:    s_bfe_i32 s63, s3, 0x10019
+; GFX6-NEXT:    s_bfe_i32 s64, s3, 0x10018
+; GFX6-NEXT:    s_ashr_i32 s65, s3, 31
+; GFX6-NEXT:    s_bfe_i32 s66, s3, 0x1001e
+; GFX6-NEXT:    s_bfe_i32 s67, s3, 0x1001d
+; GFX6-NEXT:    s_bfe_i32 s68, s3, 0x1001c
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    v_mov_b32_e32 v0, s66
-; GFX6-NEXT:    v_mov_b32_e32 v1, s65
-; GFX6-NEXT:    v_mov_b32_e32 v2, s64
-; GFX6-NEXT:    v_mov_b32_e32 v3, s63
-; GFX6-NEXT:    v_mov_b32_e32 v4, s62
-; GFX6-NEXT:    v_mov_b32_e32 v5, s61
-; GFX6-NEXT:    v_mov_b32_e32 v6, s60
-; GFX6-NEXT:    v_mov_b32_e32 v7, s59
-; GFX6-NEXT:    v_mov_b32_e32 v8, s67
-; GFX6-NEXT:    v_mov_b32_e32 v9, s58
-; GFX6-NEXT:    v_mov_b32_e32 v10, s57
-; GFX6-NEXT:    v_mov_b32_e32 v11, s56
-; GFX6-NEXT:    v_mov_b32_e32 v12, s55
-; GFX6-NEXT:    v_mov_b32_e32 v13, s54
-; GFX6-NEXT:    v_mov_b32_e32 v14, s53
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GFX6-NEXT:    v_mov_b32_e32 v15, s68
-; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GFX6-NEXT:    s_waitcnt expcnt(3)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s52
-; GFX6-NEXT:    v_mov_b32_e32 v1, s51
-; GFX6-NEXT:    v_mov_b32_e32 v2, s50
-; GFX6-NEXT:    v_mov_b32_e32 v3, s49
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s48
-; GFX6-NEXT:    v_mov_b32_e32 v1, s47
-; GFX6-NEXT:    v_mov_b32_e32 v2, s46
-; GFX6-NEXT:    v_mov_b32_e32 v3, s45
+; GFX6-NEXT:    v_mov_b32_e32 v0, s51
+; GFX6-NEXT:    v_mov_b32_e32 v1, s50
+; GFX6-NEXT:    v_mov_b32_e32 v2, s49
+; GFX6-NEXT:    v_mov_b32_e32 v3, s48
+; GFX6-NEXT:    v_mov_b32_e32 v4, s47
+; GFX6-NEXT:    v_mov_b32_e32 v5, s46
+; GFX6-NEXT:    v_mov_b32_e32 v6, s45
+; GFX6-NEXT:    v_mov_b32_e32 v7, s44
+; GFX6-NEXT:    v_mov_b32_e32 v8, s43
+; GFX6-NEXT:    v_mov_b32_e32 v9, s42
+; GFX6-NEXT:    v_mov_b32_e32 v10, s41
+; GFX6-NEXT:    v_mov_b32_e32 v11, s40
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s44
-; GFX6-NEXT:    v_mov_b32_e32 v1, s43
-; GFX6-NEXT:    v_mov_b32_e32 v2, s42
-; GFX6-NEXT:    v_mov_b32_e32 v3, s41
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s40
-; GFX6-NEXT:    v_mov_b32_e32 v1, s39
-; GFX6-NEXT:    v_mov_b32_e32 v2, s38
-; GFX6-NEXT:    v_mov_b32_e32 v3, s37
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s36
-; GFX6-NEXT:    v_mov_b32_e32 v1, s35
-; GFX6-NEXT:    v_mov_b32_e32 v2, s34
-; GFX6-NEXT:    v_mov_b32_e32 v3, s33
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:144
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NEXT:    v_mov_b32_e32 v0, s39
+; GFX6-NEXT:    v_mov_b32_e32 v1, s38
+; GFX6-NEXT:    v_mov_b32_e32 v2, s37
+; GFX6-NEXT:    v_mov_b32_e32 v3, s36
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s31
-; GFX6-NEXT:    v_mov_b32_e32 v1, s30
-; GFX6-NEXT:    v_mov_b32_e32 v2, s29
-; GFX6-NEXT:    v_mov_b32_e32 v3, s28
+; GFX6-NEXT:    v_mov_b32_e32 v0, s35
+; GFX6-NEXT:    v_mov_b32_e32 v1, s34
+; GFX6-NEXT:    v_mov_b32_e32 v2, s33
+; GFX6-NEXT:    v_mov_b32_e32 v3, s31
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s27
-; GFX6-NEXT:    v_mov_b32_e32 v1, s26
-; GFX6-NEXT:    v_mov_b32_e32 v2, s25
-; GFX6-NEXT:    v_mov_b32_e32 v3, s24
+; GFX6-NEXT:    v_mov_b32_e32 v0, s30
+; GFX6-NEXT:    v_mov_b32_e32 v1, s29
+; GFX6-NEXT:    v_mov_b32_e32 v2, s28
+; GFX6-NEXT:    v_mov_b32_e32 v3, s27
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s23
-; GFX6-NEXT:    v_mov_b32_e32 v1, s22
-; GFX6-NEXT:    v_mov_b32_e32 v2, s21
-; GFX6-NEXT:    v_mov_b32_e32 v3, s20
+; GFX6-NEXT:    v_mov_b32_e32 v0, s26
+; GFX6-NEXT:    v_mov_b32_e32 v1, s25
+; GFX6-NEXT:    v_mov_b32_e32 v2, s24
+; GFX6-NEXT:    v_mov_b32_e32 v3, s23
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s19
-; GFX6-NEXT:    v_mov_b32_e32 v1, s18
-; GFX6-NEXT:    v_mov_b32_e32 v2, s17
-; GFX6-NEXT:    v_mov_b32_e32 v3, s16
+; GFX6-NEXT:    v_mov_b32_e32 v0, s22
+; GFX6-NEXT:    v_mov_b32_e32 v1, s21
+; GFX6-NEXT:    v_mov_b32_e32 v2, s20
+; GFX6-NEXT:    v_mov_b32_e32 v3, s19
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s15
-; GFX6-NEXT:    v_mov_b32_e32 v1, s14
-; GFX6-NEXT:    v_mov_b32_e32 v2, s13
-; GFX6-NEXT:    v_mov_b32_e32 v3, s12
+; GFX6-NEXT:    v_mov_b32_e32 v0, s18
+; GFX6-NEXT:    v_mov_b32_e32 v1, s17
+; GFX6-NEXT:    v_mov_b32_e32 v2, s16
+; GFX6-NEXT:    v_mov_b32_e32 v3, s15
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s11
@@ -3010,6 +2978,33 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s4
+; GFX6-NEXT:    v_mov_b32_e32 v4, s68
+; GFX6-NEXT:    v_mov_b32_e32 v5, s67
+; GFX6-NEXT:    v_mov_b32_e32 v6, s66
+; GFX6-NEXT:    v_mov_b32_e32 v7, s65
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s64
+; GFX6-NEXT:    v_mov_b32_e32 v5, s63
+; GFX6-NEXT:    v_mov_b32_e32 v6, s62
+; GFX6-NEXT:    v_mov_b32_e32 v7, s61
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s60
+; GFX6-NEXT:    v_mov_b32_e32 v5, s59
+; GFX6-NEXT:    v_mov_b32_e32 v6, s58
+; GFX6-NEXT:    v_mov_b32_e32 v7, s57
+; GFX6-NEXT:    v_mov_b32_e32 v8, s56
+; GFX6-NEXT:    v_mov_b32_e32 v9, s55
+; GFX6-NEXT:    v_mov_b32_e32 v10, s54
+; GFX6-NEXT:    v_mov_b32_e32 v11, s53
+; GFX6-NEXT:    v_mov_b32_e32 v12, s52
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
+; GFX6-NEXT:    v_mov_b32_e32 v13, s14
+; GFX6-NEXT:    v_mov_b32_e32 v14, s13
+; GFX6-NEXT:    v_mov_b32_e32 v15, s12
+; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -4506,53 +4501,49 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s8, s6
 ; GFX6-NEXT:    s_mov_b32 s9, s7
-; GFX6-NEXT:    buffer_load_ushort v29, off, s[8:11], 0
+; GFX6-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v3, v1
-; GFX6-NEXT:    v_mov_b32_e32 v4, v1
-; GFX6-NEXT:    v_mov_b32_e32 v6, v1
-; GFX6-NEXT:    v_mov_b32_e32 v7, v1
-; GFX6-NEXT:    v_mov_b32_e32 v9, v1
-; GFX6-NEXT:    v_mov_b32_e32 v10, v1
-; GFX6-NEXT:    v_mov_b32_e32 v12, v1
-; GFX6-NEXT:    v_mov_b32_e32 v14, v1
-; GFX6-NEXT:    v_mov_b32_e32 v16, v1
-; GFX6-NEXT:    v_mov_b32_e32 v18, v1
-; GFX6-NEXT:    v_mov_b32_e32 v20, v1
-; GFX6-NEXT:    v_mov_b32_e32 v22, v1
-; GFX6-NEXT:    v_mov_b32_e32 v24, v1
-; GFX6-NEXT:    v_mov_b32_e32 v26, v1
-; GFX6-NEXT:    v_mov_b32_e32 v28, v1
+; GFX6-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX6-NEXT:    s_mov_b32 s0, s4
 ; GFX6-NEXT:    s_mov_b32 s1, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v2, v29, 11, 1
-; GFX6-NEXT:    v_bfe_u32 v0, v29, 10, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
-; GFX6-NEXT:    v_bfe_u32 v5, v29, 9, 1
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v3, v29, 8, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 15, v29
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v6, v29, 14, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
-; GFX6-NEXT:    v_bfe_u32 v27, v29, 5, 1
-; GFX6-NEXT:    v_bfe_u32 v23, v29, 7, 1
-; GFX6-NEXT:    v_bfe_u32 v19, v29, 1, 1
-; GFX6-NEXT:    v_bfe_u32 v15, v29, 3, 1
-; GFX6-NEXT:    v_bfe_u32 v11, v29, 13, 1
-; GFX6-NEXT:    v_bfe_u32 v25, v29, 4, 1
-; GFX6-NEXT:    v_bfe_u32 v21, v29, 6, 1
-; GFX6-NEXT:    v_and_b32_e32 v17, 1, v29
-; GFX6-NEXT:    v_bfe_u32 v13, v29, 2, 1
+; GFX6-NEXT:    v_bfe_u32 v6, v0, 5, 1
+; GFX6-NEXT:    v_bfe_u32 v4, v0, 7, 1
+; GFX6-NEXT:    v_bfe_u32 v10, v0, 1, 1
+; GFX6-NEXT:    v_bfe_u32 v14, v0, 3, 1
+; GFX6-NEXT:    v_bfe_u32 v18, v0, 13, 1
+; GFX6-NEXT:    v_bfe_u32 v2, v0, 6, 1
+; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    v_lshrrev_b32_e32 v22, 15, v0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_bfe_u32 v9, v29, 12, 1
-; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
-; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
-; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0
-; GFX6-NEXT:    buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
-; GFX6-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    v_bfe_u32 v2, v0, 11, 1
+; GFX6-NEXT:    v_bfe_u32 v4, v0, 4, 1
+; GFX6-NEXT:    v_and_b32_e32 v8, 1, v0
+; GFX6-NEXT:    v_bfe_u32 v12, v0, 2, 1
+; GFX6-NEXT:    v_bfe_u32 v16, v0, 12, 1
+; GFX6-NEXT:    v_bfe_u32 v20, v0, 14, 1
+; GFX6-NEXT:    v_bfe_u32 v26, v0, 9, 1
+; GFX6-NEXT:    v_bfe_u32 v24, v0, 8, 1
+; GFX6-NEXT:    v_bfe_u32 v0, v0, 10, 1
+; GFX6-NEXT:    v_mov_b32_e32 v25, v1
+; GFX6-NEXT:    v_mov_b32_e32 v27, v1
+; GFX6-NEXT:    v_mov_b32_e32 v21, v1
+; GFX6-NEXT:    v_mov_b32_e32 v23, v1
+; GFX6-NEXT:    v_mov_b32_e32 v17, v1
+; GFX6-NEXT:    v_mov_b32_e32 v19, v1
+; GFX6-NEXT:    v_mov_b32_e32 v13, v1
+; GFX6-NEXT:    v_mov_b32_e32 v15, v1
+; GFX6-NEXT:    v_mov_b32_e32 v9, v1
+; GFX6-NEXT:    v_mov_b32_e32 v11, v1
+; GFX6-NEXT:    v_mov_b32_e32 v7, v1
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -4762,58 +4753,58 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 14, v5
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 15, v5
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 12, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 10, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v11, 11, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v12, 8, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v13, 9, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 6, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 7, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 4, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 5, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v14, 2, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 13, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 10, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 11, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 8, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 9, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v11, 6, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v13, 4, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 5, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v12, 2, v5
 ; GFX6-NEXT:    v_bfe_i32 v2, v1, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 3, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v14, 3, v5
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v5
-; GFX6-NEXT:    v_bfe_i32 v2, v15, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v0, v14, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v2, v10, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v0, v9, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v10, v10, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v8, v8, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v14, v13, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v10, v8, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v8, v7, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v14, v14, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v12, v12, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v18, v11, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v16, v9, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v18, v15, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v16, v13, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v22, v1, 0, 1
 ; GFX6-NEXT:    v_bfe_i32 v20, v5, 0, 1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 13, v5
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 7, v5
 ; GFX6-NEXT:    v_bfe_i32 v26, v1, 0, 1
-; GFX6-NEXT:    v_bfe_i32 v24, v7, 0, 1
+; GFX6-NEXT:    v_bfe_i32 v24, v11, 0, 1
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
-; GFX6-NEXT:    v_ashrrev_i32_e32 v23, 31, v22
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
+; GFX6-NEXT:    v_ashrrev_i32_e32 v23, 31, v22
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v17, 31, v16
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v27, 31, v26
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v25, 31, v24
-; GFX6-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96
-; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
-; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:64
-; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
-; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
+; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -5470,165 +5461,165 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s8, s[2:3], 0x0
+; GFX6-NEXT:    s_load_dword s26, s[2:3], 0x0
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshr_b32 s52, s8, 30
-; GFX6-NEXT:    s_lshr_b32 s46, s8, 31
-; GFX6-NEXT:    s_lshr_b32 s48, s8, 28
-; GFX6-NEXT:    s_lshr_b32 s36, s8, 29
-; GFX6-NEXT:    s_lshr_b32 s38, s8, 26
-; GFX6-NEXT:    s_lshr_b32 s26, s8, 27
-; GFX6-NEXT:    s_lshr_b32 s28, s8, 24
-; GFX6-NEXT:    s_lshr_b32 s4, s8, 25
-; GFX6-NEXT:    s_lshr_b32 s6, s8, 22
-; GFX6-NEXT:    s_lshr_b32 s10, s8, 23
-; GFX6-NEXT:    s_lshr_b32 s12, s8, 20
-; GFX6-NEXT:    s_lshr_b32 s14, s8, 21
-; GFX6-NEXT:    s_lshr_b32 s16, s8, 18
-; GFX6-NEXT:    s_lshr_b32 s18, s8, 19
-; GFX6-NEXT:    s_lshr_b32 s20, s8, 16
-; GFX6-NEXT:    s_lshr_b32 s22, s8, 17
-; GFX6-NEXT:    s_lshr_b32 s24, s8, 14
-; GFX6-NEXT:    s_lshr_b32 s30, s8, 15
-; GFX6-NEXT:    s_lshr_b32 s34, s8, 12
-; GFX6-NEXT:    s_lshr_b32 s40, s8, 13
-; GFX6-NEXT:    s_lshr_b32 s42, s8, 10
-; GFX6-NEXT:    s_lshr_b32 s44, s8, 11
-; GFX6-NEXT:    s_bfe_i64 s[50:51], s[8:9], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v0, s50
-; GFX6-NEXT:    v_mov_b32_e32 v1, s51
-; GFX6-NEXT:    s_lshr_b32 s50, s8, 8
-; GFX6-NEXT:    v_mov_b32_e32 v2, s52
-; GFX6-NEXT:    v_mov_b32_e32 v3, s53
-; GFX6-NEXT:    s_lshr_b32 s52, s8, 9
-; GFX6-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v4, s46
-; GFX6-NEXT:    v_mov_b32_e32 v5, s47
-; GFX6-NEXT:    s_lshr_b32 s46, s8, 6
-; GFX6-NEXT:    v_mov_b32_e32 v6, s48
-; GFX6-NEXT:    v_mov_b32_e32 v7, s49
-; GFX6-NEXT:    s_lshr_b32 s48, s8, 7
-; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v8, s36
-; GFX6-NEXT:    v_mov_b32_e32 v9, s37
-; GFX6-NEXT:    s_lshr_b32 s36, s8, 4
-; GFX6-NEXT:    v_mov_b32_e32 v10, s38
-; GFX6-NEXT:    v_mov_b32_e32 v11, s39
-; GFX6-NEXT:    s_lshr_b32 s38, s8, 5
-; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v12, s26
-; GFX6-NEXT:    v_mov_b32_e32 v13, s27
-; GFX6-NEXT:    s_lshr_b32 s26, s8, 2
-; GFX6-NEXT:    v_mov_b32_e32 v14, s28
-; GFX6-NEXT:    v_mov_b32_e32 v15, s29
-; GFX6-NEXT:    s_lshr_b32 s28, s8, 3
-; GFX6-NEXT:    s_lshr_b32 s8, s8, 1
-; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX6-NEXT:    s_lshr_b32 s18, s26, 30
+; GFX6-NEXT:    s_lshr_b32 s20, s26, 31
+; GFX6-NEXT:    s_lshr_b32 s22, s26, 28
+; GFX6-NEXT:    s_lshr_b32 s24, s26, 29
+; GFX6-NEXT:    s_lshr_b32 s28, s26, 26
+; GFX6-NEXT:    s_lshr_b32 s30, s26, 27
+; GFX6-NEXT:    s_lshr_b32 s34, s26, 24
+; GFX6-NEXT:    s_lshr_b32 s38, s26, 25
+; GFX6-NEXT:    s_lshr_b32 s48, s26, 22
+; GFX6-NEXT:    s_lshr_b32 s54, s26, 23
+; GFX6-NEXT:    s_lshr_b32 s52, s26, 20
+; GFX6-NEXT:    s_lshr_b32 s36, s26, 21
+; GFX6-NEXT:    s_lshr_b32 s40, s26, 18
+; GFX6-NEXT:    s_lshr_b32 s42, s26, 19
+; GFX6-NEXT:    s_lshr_b32 s44, s26, 16
+; GFX6-NEXT:    s_lshr_b32 s46, s26, 17
+; GFX6-NEXT:    s_lshr_b32 s50, s26, 14
+; GFX6-NEXT:    s_lshr_b32 s56, s26, 15
+; GFX6-NEXT:    s_lshr_b32 s58, s26, 12
+; GFX6-NEXT:    s_lshr_b32 s60, s26, 13
+; GFX6-NEXT:    s_lshr_b32 s4, s26, 10
+; GFX6-NEXT:    s_lshr_b32 s6, s26, 11
+; GFX6-NEXT:    s_lshr_b32 s8, s26, 8
+; GFX6-NEXT:    s_lshr_b32 s10, s26, 9
+; GFX6-NEXT:    s_lshr_b32 s12, s26, 6
+; GFX6-NEXT:    s_lshr_b32 s14, s26, 7
+; GFX6-NEXT:    s_lshr_b32 s16, s26, 4
 ; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v2, s38
+; GFX6-NEXT:    v_mov_b32_e32 v3, s39
+; GFX6-NEXT:    s_lshr_b32 s38, s26, 5
 ; GFX6-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v4, s48
+; GFX6-NEXT:    v_mov_b32_e32 v5, s49
+; GFX6-NEXT:    s_lshr_b32 s48, s26, 2
+; GFX6-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v6, s54
+; GFX6-NEXT:    v_mov_b32_e32 v7, s55
+; GFX6-NEXT:    s_lshr_b32 s54, s26, 3
 ; GFX6-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v8, s52
+; GFX6-NEXT:    v_mov_b32_e32 v9, s53
+; GFX6-NEXT:    s_lshr_b32 s52, s26, 1
+; GFX6-NEXT:    s_bfe_i64 s[62:63], s[26:27], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[34:35], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:176
+; GFX6-NEXT:    v_mov_b32_e32 v10, s36
+; GFX6-NEXT:    v_mov_b32_e32 v11, s37
+; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
+; GFX6-NEXT:    s_waitcnt expcnt(1)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s40
+; GFX6-NEXT:    v_mov_b32_e32 v5, s41
+; GFX6-NEXT:    v_mov_b32_e32 v6, s42
+; GFX6-NEXT:    v_mov_b32_e32 v7, s43
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:144
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s44
+; GFX6-NEXT:    v_mov_b32_e32 v5, s45
+; GFX6-NEXT:    v_mov_b32_e32 v6, s46
+; GFX6-NEXT:    v_mov_b32_e32 v7, s47
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:128
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s50
+; GFX6-NEXT:    v_mov_b32_e32 v5, s51
+; GFX6-NEXT:    v_mov_b32_e32 v6, s56
+; GFX6-NEXT:    v_mov_b32_e32 v7, s57
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s58
+; GFX6-NEXT:    v_mov_b32_e32 v5, s59
+; GFX6-NEXT:    v_mov_b32_e32 v6, s60
+; GFX6-NEXT:    v_mov_b32_e32 v7, s61
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v4, s62
+; GFX6-NEXT:    v_mov_b32_e32 v5, s63
+; GFX6-NEXT:    s_bfe_i64 s[34:35], s[52:53], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[36:37], s[54:55], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[40:41], s[48:49], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208
-; GFX6-NEXT:    v_mov_b32_e32 v16, s4
-; GFX6-NEXT:    v_mov_b32_e32 v17, s5
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
-; GFX6-NEXT:    s_waitcnt expcnt(3)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NEXT:    v_mov_b32_e32 v3, s7
-; GFX6-NEXT:    v_mov_b32_e32 v4, s10
-; GFX6-NEXT:    v_mov_b32_e32 v5, s11
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s12
-; GFX6-NEXT:    v_mov_b32_e32 v3, s13
-; GFX6-NEXT:    v_mov_b32_e32 v4, s14
-; GFX6-NEXT:    v_mov_b32_e32 v5, s15
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:160
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s16
-; GFX6-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NEXT:    v_mov_b32_e32 v4, s18
-; GFX6-NEXT:    v_mov_b32_e32 v5, s19
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:144
+; GFX6-NEXT:    v_mov_b32_e32 v6, s18
+; GFX6-NEXT:    v_mov_b32_e32 v7, s19
+; GFX6-NEXT:    v_mov_b32_e32 v8, s20
+; GFX6-NEXT:    v_mov_b32_e32 v9, s21
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:240
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s20
-; GFX6-NEXT:    v_mov_b32_e32 v3, s21
-; GFX6-NEXT:    v_mov_b32_e32 v4, s22
-; GFX6-NEXT:    v_mov_b32_e32 v5, s23
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:128
+; GFX6-NEXT:    v_mov_b32_e32 v6, s22
+; GFX6-NEXT:    v_mov_b32_e32 v7, s23
+; GFX6-NEXT:    v_mov_b32_e32 v8, s24
+; GFX6-NEXT:    v_mov_b32_e32 v9, s25
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s24
-; GFX6-NEXT:    v_mov_b32_e32 v3, s25
-; GFX6-NEXT:    v_mov_b32_e32 v4, s30
-; GFX6-NEXT:    v_mov_b32_e32 v5, s31
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    v_mov_b32_e32 v6, s28
+; GFX6-NEXT:    v_mov_b32_e32 v7, s29
+; GFX6-NEXT:    v_mov_b32_e32 v8, s30
+; GFX6-NEXT:    v_mov_b32_e32 v9, s31
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:208
+; GFX6-NEXT:    v_mov_b32_e32 v0, s26
+; GFX6-NEXT:    v_mov_b32_e32 v1, s27
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s34
-; GFX6-NEXT:    v_mov_b32_e32 v3, s35
-; GFX6-NEXT:    v_mov_b32_e32 v4, s40
-; GFX6-NEXT:    v_mov_b32_e32 v5, s41
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_mov_b32_e32 v3, s7
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s42
-; GFX6-NEXT:    v_mov_b32_e32 v3, s43
-; GFX6-NEXT:    v_mov_b32_e32 v4, s44
-; GFX6-NEXT:    v_mov_b32_e32 v5, s45
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
+; GFX6-NEXT:    v_mov_b32_e32 v0, s8
+; GFX6-NEXT:    v_mov_b32_e32 v1, s9
+; GFX6-NEXT:    v_mov_b32_e32 v2, s10
+; GFX6-NEXT:    v_mov_b32_e32 v3, s11
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s50
-; GFX6-NEXT:    v_mov_b32_e32 v3, s51
-; GFX6-NEXT:    v_mov_b32_e32 v4, s52
-; GFX6-NEXT:    v_mov_b32_e32 v5, s53
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
+; GFX6-NEXT:    v_mov_b32_e32 v0, s12
+; GFX6-NEXT:    v_mov_b32_e32 v1, s13
+; GFX6-NEXT:    v_mov_b32_e32 v2, s14
+; GFX6-NEXT:    v_mov_b32_e32 v3, s15
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s46
-; GFX6-NEXT:    v_mov_b32_e32 v3, s47
-; GFX6-NEXT:    v_mov_b32_e32 v4, s48
-; GFX6-NEXT:    v_mov_b32_e32 v5, s49
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
+; GFX6-NEXT:    v_mov_b32_e32 v0, s16
+; GFX6-NEXT:    v_mov_b32_e32 v1, s17
+; GFX6-NEXT:    v_mov_b32_e32 v2, s38
+; GFX6-NEXT:    v_mov_b32_e32 v3, s39
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v0, s40
+; GFX6-NEXT:    v_mov_b32_e32 v1, s41
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s36
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s37
-; GFX6-NEXT:    v_mov_b32_e32 v4, s38
-; GFX6-NEXT:    v_mov_b32_e32 v5, s39
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s26
-; GFX6-NEXT:    v_mov_b32_e32 v3, s27
-; GFX6-NEXT:    v_mov_b32_e32 v4, s28
-; GFX6-NEXT:    v_mov_b32_e32 v5, s29
-; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NEXT:    v_mov_b32_e32 v3, s9
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NEXT:    v_mov_b32_e32 v6, s34
+; GFX6-NEXT:    v_mov_b32_e32 v7, s35
+; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_sextload_v32i1_to_v32i64:
@@ -6038,26 +6029,26 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_u32 s27, s2, 0x10019
 ; GFX6-NEXT:    s_bfe_u32 s29, s2, 0x1001b
 ; GFX6-NEXT:    s_bfe_u32 s31, s2, 0x1001d
-; GFX6-NEXT:    s_lshr_b32 s34, s2, 31
-; GFX6-NEXT:    s_bfe_u32 s36, s3, 0x10003
-; GFX6-NEXT:    s_bfe_u32 s37, s3, 0x10005
-; GFX6-NEXT:    s_bfe_u32 s38, s3, 0x10007
-; GFX6-NEXT:    s_bfe_u32 s39, s3, 0x10009
-; GFX6-NEXT:    s_bfe_u32 s40, s3, 0x1000b
-; GFX6-NEXT:    s_bfe_u32 s41, s3, 0x1000d
-; GFX6-NEXT:    s_bfe_u32 s42, s3, 0x1000f
-; GFX6-NEXT:    s_bfe_u32 s43, s3, 0x10011
-; GFX6-NEXT:    s_bfe_u32 s44, s3, 0x10013
-; GFX6-NEXT:    s_bfe_u32 s45, s3, 0x10015
-; GFX6-NEXT:    s_bfe_u32 s46, s3, 0x10017
-; GFX6-NEXT:    s_bfe_u32 s47, s3, 0x10019
-; GFX6-NEXT:    s_bfe_u32 s48, s3, 0x1001b
-; GFX6-NEXT:    s_bfe_u32 s49, s3, 0x1001d
-; GFX6-NEXT:    s_lshr_b32 s50, s3, 31
-; GFX6-NEXT:    s_bfe_u32 s9, s3, 0x10001
-; GFX6-NEXT:    s_bfe_u32 s6, s2, 0x10001
-; GFX6-NEXT:    s_and_b32 s7, s2, 1
-; GFX6-NEXT:    s_and_b32 s10, s3, 1
+; GFX6-NEXT:    s_lshr_b32 s33, s2, 31
+; GFX6-NEXT:    s_bfe_u32 s34, s3, 0x10003
+; GFX6-NEXT:    s_bfe_u32 s35, s3, 0x10005
+; GFX6-NEXT:    s_bfe_u32 s36, s3, 0x10007
+; GFX6-NEXT:    s_bfe_u32 s37, s3, 0x10009
+; GFX6-NEXT:    s_bfe_u32 s38, s3, 0x1000b
+; GFX6-NEXT:    s_bfe_u32 s39, s3, 0x1000d
+; GFX6-NEXT:    s_bfe_u32 s40, s3, 0x1000f
+; GFX6-NEXT:    s_bfe_u32 s41, s3, 0x10011
+; GFX6-NEXT:    s_bfe_u32 s42, s3, 0x10013
+; GFX6-NEXT:    s_bfe_u32 s43, s3, 0x10015
+; GFX6-NEXT:    s_bfe_u32 s44, s3, 0x10017
+; GFX6-NEXT:    s_bfe_u32 s45, s3, 0x10019
+; GFX6-NEXT:    s_bfe_u32 s46, s3, 0x1001b
+; GFX6-NEXT:    s_bfe_u32 s47, s3, 0x1001d
+; GFX6-NEXT:    s_lshr_b32 s48, s3, 31
+; GFX6-NEXT:    s_bfe_u32 s10, s3, 0x10001
+; GFX6-NEXT:    s_bfe_u32 s7, s2, 0x10001
+; GFX6-NEXT:    s_and_b32 s6, s2, 1
+; GFX6-NEXT:    s_and_b32 s9, s3, 1
 ; GFX6-NEXT:    s_bfe_u32 s12, s2, 0x10002
 ; GFX6-NEXT:    s_bfe_u32 s14, s2, 0x10004
 ; GFX6-NEXT:    s_bfe_u32 s16, s2, 0x10006
@@ -6068,8 +6059,8 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_bfe_u32 s26, s2, 0x10010
 ; GFX6-NEXT:    s_bfe_u32 s28, s2, 0x10012
 ; GFX6-NEXT:    s_bfe_u32 s30, s2, 0x10014
-; GFX6-NEXT:    s_bfe_u32 s33, s2, 0x10016
-; GFX6-NEXT:    s_bfe_u32 s35, s2, 0x10018
+; GFX6-NEXT:    s_bfe_u32 s49, s2, 0x10016
+; GFX6-NEXT:    s_bfe_u32 s50, s2, 0x10018
 ; GFX6-NEXT:    s_bfe_u32 s51, s2, 0x1001a
 ; GFX6-NEXT:    s_bfe_u32 s52, s2, 0x1001c
 ; GFX6-NEXT:    s_bfe_u32 s53, s2, 0x1001e
@@ -6092,132 +6083,132 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_mov_b32_e32 v3, v1
+; GFX6-NEXT:    v_mov_b32_e32 v2, s48
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s67
-; GFX6-NEXT:    v_mov_b32_e32 v2, s50
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s47
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s68
-; GFX6-NEXT:    v_mov_b32_e32 v2, s49
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s46
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s66
-; GFX6-NEXT:    v_mov_b32_e32 v2, s48
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s45
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s65
-; GFX6-NEXT:    v_mov_b32_e32 v2, s47
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:448
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s44
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s64
-; GFX6-NEXT:    v_mov_b32_e32 v2, s46
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s43
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s63
-; GFX6-NEXT:    v_mov_b32_e32 v2, s45
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:416
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s42
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s62
-; GFX6-NEXT:    v_mov_b32_e32 v2, s44
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s41
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s61
-; GFX6-NEXT:    v_mov_b32_e32 v2, s43
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s40
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s60
-; GFX6-NEXT:    v_mov_b32_e32 v2, s42
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s39
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s59
-; GFX6-NEXT:    v_mov_b32_e32 v2, s41
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s38
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s58
-; GFX6-NEXT:    v_mov_b32_e32 v2, s40
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s37
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s57
-; GFX6-NEXT:    v_mov_b32_e32 v2, s39
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s36
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s56
-; GFX6-NEXT:    v_mov_b32_e32 v2, s38
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s35
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s55
-; GFX6-NEXT:    v_mov_b32_e32 v2, s37
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s34
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s54
-; GFX6-NEXT:    v_mov_b32_e32 v2, s36
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v2, s33
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s53
-; GFX6-NEXT:    v_mov_b32_e32 v2, s34
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s52
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s31
+; GFX6-NEXT:    v_mov_b32_e32 v0, s52
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s51
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s29
+; GFX6-NEXT:    v_mov_b32_e32 v0, s51
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s35
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s27
+; GFX6-NEXT:    v_mov_b32_e32 v0, s50
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s33
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s25
+; GFX6-NEXT:    v_mov_b32_e32 v0, s49
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s30
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s23
+; GFX6-NEXT:    v_mov_b32_e32 v0, s30
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s28
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s21
+; GFX6-NEXT:    v_mov_b32_e32 v0, s28
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s26
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s19
+; GFX6-NEXT:    v_mov_b32_e32 v0, s26
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s24
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s17
+; GFX6-NEXT:    v_mov_b32_e32 v0, s24
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s22
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s15
+; GFX6-NEXT:    v_mov_b32_e32 v0, s22
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s20
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s13
+; GFX6-NEXT:    v_mov_b32_e32 v0, s20
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s18
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s11
+; GFX6-NEXT:    v_mov_b32_e32 v0, s18
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s16
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6-NEXT:    v_mov_b32_e32 v0, s16
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s14
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s5
+; GFX6-NEXT:    v_mov_b32_e32 v0, s14
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s10
-; GFX6-NEXT:    v_mov_b32_e32 v2, s9
+; GFX6-NEXT:    v_mov_b32_e32 v2, s10
+; GFX6-NEXT:    v_mov_b32_e32 v0, s9
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s7
-; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_mov_b32_e32 v2, s7
+; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -6884,321 +6875,320 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshr_b32 s48, s5, 30
-; GFX6-NEXT:    s_lshr_b32 s46, s5, 28
-; GFX6-NEXT:    s_lshr_b32 s44, s5, 29
-; GFX6-NEXT:    s_lshr_b32 s40, s5, 26
-; GFX6-NEXT:    s_lshr_b32 s42, s5, 27
-; GFX6-NEXT:    s_lshr_b32 s36, s5, 24
-; GFX6-NEXT:    s_lshr_b32 s38, s5, 25
-; GFX6-NEXT:    s_lshr_b32 s30, s5, 22
-; GFX6-NEXT:    s_lshr_b32 s34, s5, 23
-; GFX6-NEXT:    s_lshr_b32 s26, s5, 20
-; GFX6-NEXT:    s_lshr_b32 s28, s5, 21
-; GFX6-NEXT:    s_lshr_b32 s22, s5, 18
-; GFX6-NEXT:    s_lshr_b32 s24, s5, 19
-; GFX6-NEXT:    s_lshr_b32 s18, s5, 16
-; GFX6-NEXT:    s_lshr_b32 s20, s5, 17
-; GFX6-NEXT:    s_lshr_b32 s14, s5, 14
-; GFX6-NEXT:    s_lshr_b32 s16, s5, 15
-; GFX6-NEXT:    s_lshr_b32 s10, s5, 12
-; GFX6-NEXT:    s_lshr_b32 s12, s5, 13
-; GFX6-NEXT:    s_lshr_b32 s6, s5, 10
-; GFX6-NEXT:    s_lshr_b32 s8, s5, 11
-; GFX6-NEXT:    s_mov_b32 s50, s5
-; GFX6-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[52:53], s[4:5], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v0, s50
-; GFX6-NEXT:    v_mov_b32_e32 v1, s51
-; GFX6-NEXT:    s_lshr_b32 s50, s5, 8
-; GFX6-NEXT:    v_mov_b32_e32 v4, s52
-; GFX6-NEXT:    v_mov_b32_e32 v5, s53
-; GFX6-NEXT:    s_lshr_b32 s52, s5, 9
-; GFX6-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[54:55], s[46:47], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v6, s48
-; GFX6-NEXT:    v_mov_b32_e32 v7, s49
-; GFX6-NEXT:    s_lshr_b32 s46, s5, 6
-; GFX6-NEXT:    v_mov_b32_e32 v10, s54
-; GFX6-NEXT:    v_mov_b32_e32 v11, s55
-; GFX6-NEXT:    s_lshr_b32 s48, s5, 7
-; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX6-NEXT:    s_ashr_i32 s7, s5, 31
-; GFX6-NEXT:    v_mov_b32_e32 v12, s44
-; GFX6-NEXT:    v_mov_b32_e32 v13, s45
-; GFX6-NEXT:    s_lshr_b32 s44, s5, 4
-; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[54:55], s[42:43], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v14, s40
-; GFX6-NEXT:    v_mov_b32_e32 v15, s41
-; GFX6-NEXT:    s_lshr_b32 s42, s5, 5
-; GFX6-NEXT:    v_mov_b32_e32 v16, s54
-; GFX6-NEXT:    v_mov_b32_e32 v17, s55
-; GFX6-NEXT:    s_lshr_b32 s40, s5, 2
-; GFX6-NEXT:    v_mov_b32_e32 v8, s7
+; GFX6-NEXT:    s_ashr_i32 s6, s5, 31
+; GFX6-NEXT:    s_mov_b32 s2, -1
+; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_mov_b32_e32 v3, s6
+; GFX6-NEXT:    s_lshr_b32 s6, s5, 30
+; GFX6-NEXT:    s_lshr_b32 s8, s5, 28
+; GFX6-NEXT:    s_lshr_b32 s10, s5, 29
+; GFX6-NEXT:    s_lshr_b32 s12, s5, 26
+; GFX6-NEXT:    s_lshr_b32 s14, s5, 27
+; GFX6-NEXT:    s_lshr_b32 s16, s5, 24
+; GFX6-NEXT:    s_lshr_b32 s22, s5, 25
+; GFX6-NEXT:    s_lshr_b32 s26, s5, 22
+; GFX6-NEXT:    s_lshr_b32 s28, s5, 23
+; GFX6-NEXT:    s_lshr_b32 s34, s5, 20
+; GFX6-NEXT:    s_lshr_b32 s46, s5, 21
+; GFX6-NEXT:    s_lshr_b32 s48, s5, 18
+; GFX6-NEXT:    s_lshr_b32 s62, s5, 19
+; GFX6-NEXT:    s_lshr_b32 s64, s5, 16
+; GFX6-NEXT:    s_lshr_b32 s66, s5, 17
+; GFX6-NEXT:    s_lshr_b32 s68, s5, 14
+; GFX6-NEXT:    s_lshr_b32 s36, s5, 15
+; GFX6-NEXT:    s_lshr_b32 s38, s5, 12
+; GFX6-NEXT:    s_lshr_b32 s40, s5, 13
+; GFX6-NEXT:    s_lshr_b32 s42, s5, 10
+; GFX6-NEXT:    s_lshr_b32 s30, s5, 11
+; GFX6-NEXT:    s_lshr_b32 s44, s5, 8
+; GFX6-NEXT:    s_lshr_b32 s58, s5, 9
+; GFX6-NEXT:    s_lshr_b32 s54, s5, 6
+; GFX6-NEXT:    s_lshr_b32 s24, s5, 7
+; GFX6-NEXT:    s_lshr_b32 s20, s5, 4
+; GFX6-NEXT:    s_lshr_b32 s18, s5, 5
+; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v4, s22
+; GFX6-NEXT:    v_mov_b32_e32 v5, s23
+; GFX6-NEXT:    s_lshr_b32 s56, s5, 2
+; GFX6-NEXT:    s_bfe_i64 s[22:23], s[26:27], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v6, s22
+; GFX6-NEXT:    v_mov_b32_e32 v7, s23
+; GFX6-NEXT:    s_lshr_b32 s52, s5, 3
+; GFX6-NEXT:    s_bfe_i64 s[22:23], s[28:29], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v8, s22
+; GFX6-NEXT:    v_mov_b32_e32 v9, s23
+; GFX6-NEXT:    s_lshr_b32 s22, s5, 1
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[34:35], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v10, s26
+; GFX6-NEXT:    v_mov_b32_e32 v11, s27
+; GFX6-NEXT:    s_mov_b32 s60, s5
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[46:47], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v12, s26
+; GFX6-NEXT:    v_mov_b32_e32 v13, s27
+; GFX6-NEXT:    s_lshr_b32 s50, s4, 30
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[48:49], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v14, s26
+; GFX6-NEXT:    v_mov_b32_e32 v15, s27
+; GFX6-NEXT:    s_lshr_b32 s46, s4, 31
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[62:63], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v16, s26
+; GFX6-NEXT:    v_mov_b32_e32 v17, s27
+; GFX6-NEXT:    s_lshr_b32 s48, s4, 28
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[64:65], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v18, s26
+; GFX6-NEXT:    v_mov_b32_e32 v19, s27
+; GFX6-NEXT:    s_lshr_b32 s26, s4, 29
+; GFX6-NEXT:    s_bfe_i64 s[28:29], s[66:67], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v20, s28
+; GFX6-NEXT:    v_mov_b32_e32 v21, s29
+; GFX6-NEXT:    s_lshr_b32 s28, s4, 26
+; GFX6-NEXT:    s_bfe_i64 s[34:35], s[68:69], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v22, s34
+; GFX6-NEXT:    v_mov_b32_e32 v23, s35
+; GFX6-NEXT:    s_lshr_b32 s34, s4, 27
 ; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v24, s36
+; GFX6-NEXT:    v_mov_b32_e32 v25, s37
+; GFX6-NEXT:    s_lshr_b32 s36, s4, 24
 ; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v9, s7
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:496
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:432
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v6, s36
-; GFX6-NEXT:    v_mov_b32_e32 v7, s37
-; GFX6-NEXT:    s_lshr_b32 s36, s5, 3
-; GFX6-NEXT:    v_mov_b32_e32 v8, s38
-; GFX6-NEXT:    v_mov_b32_e32 v9, s39
-; GFX6-NEXT:    s_lshr_b32 s38, s5, 1
+; GFX6-NEXT:    v_mov_b32_e32 v6, s38
+; GFX6-NEXT:    v_mov_b32_e32 v7, s39
+; GFX6-NEXT:    s_lshr_b32 s38, s4, 25
+; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v8, s40
+; GFX6-NEXT:    v_mov_b32_e32 v9, s41
+; GFX6-NEXT:    s_lshr_b32 s40, s4, 22
+; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:416
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v10, s42
+; GFX6-NEXT:    v_mov_b32_e32 v11, s43
+; GFX6-NEXT:    s_lshr_b32 s42, s4, 23
 ; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:480
+; GFX6-NEXT:    v_mov_b32_e32 v12, s30
+; GFX6-NEXT:    v_mov_b32_e32 v13, s31
+; GFX6-NEXT:    s_lshr_b32 s30, s4, 20
+; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:400
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v10, s30
-; GFX6-NEXT:    v_mov_b32_e32 v11, s31
-; GFX6-NEXT:    s_lshr_b32 s30, s4, 30
-; GFX6-NEXT:    v_mov_b32_e32 v12, s34
-; GFX6-NEXT:    v_mov_b32_e32 v13, s35
-; GFX6-NEXT:    s_lshr_b32 s34, s4, 31
-; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:464
+; GFX6-NEXT:    v_mov_b32_e32 v14, s44
+; GFX6-NEXT:    v_mov_b32_e32 v15, s45
+; GFX6-NEXT:    s_lshr_b32 s44, s4, 21
+; GFX6-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v16, s58
+; GFX6-NEXT:    v_mov_b32_e32 v17, s59
+; GFX6-NEXT:    s_lshr_b32 s58, s4, 18
+; GFX6-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:384
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v14, s26
-; GFX6-NEXT:    v_mov_b32_e32 v15, s27
-; GFX6-NEXT:    s_lshr_b32 s26, s4, 28
-; GFX6-NEXT:    v_mov_b32_e32 v16, s28
-; GFX6-NEXT:    v_mov_b32_e32 v17, s29
-; GFX6-NEXT:    s_lshr_b32 s28, s4, 29
+; GFX6-NEXT:    v_mov_b32_e32 v18, s54
+; GFX6-NEXT:    v_mov_b32_e32 v19, s55
+; GFX6-NEXT:    s_lshr_b32 s54, s4, 19
 ; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:448
+; GFX6-NEXT:    v_mov_b32_e32 v20, s24
+; GFX6-NEXT:    v_mov_b32_e32 v21, s25
+; GFX6-NEXT:    s_lshr_b32 s62, s4, 16
+; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:368
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v6, s22
-; GFX6-NEXT:    v_mov_b32_e32 v7, s23
-; GFX6-NEXT:    s_lshr_b32 s22, s4, 26
-; GFX6-NEXT:    v_mov_b32_e32 v8, s24
-; GFX6-NEXT:    v_mov_b32_e32 v9, s25
-; GFX6-NEXT:    s_lshr_b32 s24, s4, 27
-; GFX6-NEXT:    s_bfe_i64 s[54:55], s[20:21], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v22, s20
+; GFX6-NEXT:    v_mov_b32_e32 v23, s21
+; GFX6-NEXT:    s_lshr_b32 s64, s4, 17
 ; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:432
+; GFX6-NEXT:    v_mov_b32_e32 v24, s18
+; GFX6-NEXT:    v_mov_b32_e32 v25, s19
+; GFX6-NEXT:    s_lshr_b32 s66, s4, 14
+; GFX6-NEXT:    s_bfe_i64 s[18:19], s[56:57], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:352
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v6, s18
+; GFX6-NEXT:    v_mov_b32_e32 v7, s19
+; GFX6-NEXT:    s_lshr_b32 s56, s4, 15
+; GFX6-NEXT:    s_bfe_i64 s[18:19], s[52:53], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v8, s18
+; GFX6-NEXT:    v_mov_b32_e32 v9, s19
+; GFX6-NEXT:    s_lshr_b32 s52, s4, 12
+; GFX6-NEXT:    s_bfe_i64 s[18:19], s[60:61], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:336
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v10, s18
 ; GFX6-NEXT:    v_mov_b32_e32 v11, s19
-; GFX6-NEXT:    s_lshr_b32 s20, s4, 24
-; GFX6-NEXT:    v_mov_b32_e32 v12, s54
-; GFX6-NEXT:    v_mov_b32_e32 v13, s55
-; GFX6-NEXT:    s_lshr_b32 s18, s4, 25
-; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:416
+; GFX6-NEXT:    s_lshr_b32 s60, s4, 13
+; GFX6-NEXT:    s_bfe_i64 s[18:19], s[22:23], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v12, s18
+; GFX6-NEXT:    v_mov_b32_e32 v13, s19
+; GFX6-NEXT:    s_lshr_b32 s18, s4, 10
+; GFX6-NEXT:    s_bfe_i64 s[20:21], s[50:51], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:320
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v14, s14
-; GFX6-NEXT:    v_mov_b32_e32 v15, s15
-; GFX6-NEXT:    s_lshr_b32 s14, s4, 22
-; GFX6-NEXT:    v_mov_b32_e32 v16, s16
-; GFX6-NEXT:    v_mov_b32_e32 v17, s17
-; GFX6-NEXT:    s_lshr_b32 s16, s4, 23
+; GFX6-NEXT:    v_mov_b32_e32 v14, s20
+; GFX6-NEXT:    v_mov_b32_e32 v15, s21
+; GFX6-NEXT:    s_lshr_b32 s20, s4, 11
+; GFX6-NEXT:    s_bfe_i64 s[22:23], s[46:47], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v16, s22
+; GFX6-NEXT:    v_mov_b32_e32 v17, s23
+; GFX6-NEXT:    s_lshr_b32 s22, s4, 8
+; GFX6-NEXT:    s_bfe_i64 s[24:25], s[48:49], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:304
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v18, s24
+; GFX6-NEXT:    v_mov_b32_e32 v19, s25
+; GFX6-NEXT:    s_lshr_b32 s24, s4, 9
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v20, s26
+; GFX6-NEXT:    v_mov_b32_e32 v21, s27
+; GFX6-NEXT:    s_lshr_b32 s26, s4, 6
+; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:288
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v22, s28
+; GFX6-NEXT:    v_mov_b32_e32 v23, s29
+; GFX6-NEXT:    s_lshr_b32 s28, s4, 7
+; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v24, s34
+; GFX6-NEXT:    v_mov_b32_e32 v25, s35
+; GFX6-NEXT:    s_lshr_b32 s34, s4, 4
+; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:272
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v6, s36
+; GFX6-NEXT:    v_mov_b32_e32 v7, s37
+; GFX6-NEXT:    s_lshr_b32 s36, s4, 5
+; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v8, s38
+; GFX6-NEXT:    v_mov_b32_e32 v9, s39
+; GFX6-NEXT:    s_lshr_b32 s38, s4, 2
+; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:256
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v10, s40
+; GFX6-NEXT:    v_mov_b32_e32 v11, s41
+; GFX6-NEXT:    s_lshr_b32 s40, s4, 3
+; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v12, s42
+; GFX6-NEXT:    v_mov_b32_e32 v13, s43
+; GFX6-NEXT:    s_lshr_b32 s42, s4, 1
+; GFX6-NEXT:    s_bfe_i64 s[46:47], s[4:5], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[48:49], s[60:61], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[50:51], s[52:53], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[52:53], s[56:57], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[56:57], s[66:67], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[60:61], s[64:65], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[4:5], s[16:17], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:400
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v6, s10
-; GFX6-NEXT:    v_mov_b32_e32 v7, s11
-; GFX6-NEXT:    s_lshr_b32 s10, s4, 20
-; GFX6-NEXT:    v_mov_b32_e32 v8, s12
-; GFX6-NEXT:    v_mov_b32_e32 v9, s13
-; GFX6-NEXT:    s_lshr_b32 s12, s4, 21
 ; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:384
+; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:240
+; GFX6-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224
+; GFX6-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:208
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:192
+; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:176
+; GFX6-NEXT:    s_waitcnt expcnt(1)
+; GFX6-NEXT:    v_mov_b32_e32 v6, s30
+; GFX6-NEXT:    v_mov_b32_e32 v7, s31
+; GFX6-NEXT:    v_mov_b32_e32 v8, s44
+; GFX6-NEXT:    v_mov_b32_e32 v9, s45
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v10, s6
-; GFX6-NEXT:    v_mov_b32_e32 v11, s7
-; GFX6-NEXT:    s_lshr_b32 s6, s4, 18
-; GFX6-NEXT:    v_mov_b32_e32 v12, s8
-; GFX6-NEXT:    v_mov_b32_e32 v13, s9
-; GFX6-NEXT:    s_lshr_b32 s8, s4, 19
-; GFX6-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:368
+; GFX6-NEXT:    v_mov_b32_e32 v6, s58
+; GFX6-NEXT:    v_mov_b32_e32 v7, s59
+; GFX6-NEXT:    v_mov_b32_e32 v8, s54
+; GFX6-NEXT:    v_mov_b32_e32 v9, s55
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:144
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v14, s50
-; GFX6-NEXT:    v_mov_b32_e32 v15, s51
-; GFX6-NEXT:    s_lshr_b32 s50, s4, 16
-; GFX6-NEXT:    v_mov_b32_e32 v16, s52
-; GFX6-NEXT:    v_mov_b32_e32 v17, s53
-; GFX6-NEXT:    s_lshr_b32 s52, s4, 17
-; GFX6-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:352
+; GFX6-NEXT:    v_mov_b32_e32 v6, s62
+; GFX6-NEXT:    v_mov_b32_e32 v7, s63
+; GFX6-NEXT:    v_mov_b32_e32 v8, s60
+; GFX6-NEXT:    v_mov_b32_e32 v9, s61
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:128
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v6, s46
-; GFX6-NEXT:    v_mov_b32_e32 v7, s47
-; GFX6-NEXT:    s_lshr_b32 s46, s4, 14
+; GFX6-NEXT:    v_mov_b32_e32 v6, s56
+; GFX6-NEXT:    v_mov_b32_e32 v7, s57
+; GFX6-NEXT:    v_mov_b32_e32 v8, s52
+; GFX6-NEXT:    v_mov_b32_e32 v9, s53
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v6, s50
+; GFX6-NEXT:    v_mov_b32_e32 v7, s51
 ; GFX6-NEXT:    v_mov_b32_e32 v8, s48
 ; GFX6-NEXT:    v_mov_b32_e32 v9, s49
-; GFX6-NEXT:    s_lshr_b32 s48, s4, 15
-; GFX6-NEXT:    s_bfe_i64 s[54:55], s[42:43], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[42:43], s[44:45], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:336
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:96
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v10, s42
-; GFX6-NEXT:    v_mov_b32_e32 v11, s43
-; GFX6-NEXT:    s_lshr_b32 s42, s4, 12
-; GFX6-NEXT:    v_mov_b32_e32 v12, s54
-; GFX6-NEXT:    v_mov_b32_e32 v13, s55
-; GFX6-NEXT:    s_lshr_b32 s44, s4, 13
-; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:320
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v14, s40
-; GFX6-NEXT:    v_mov_b32_e32 v15, s41
-; GFX6-NEXT:    s_lshr_b32 s40, s4, 10
-; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX6-NEXT:    v_mov_b32_e32 v6, s46
+; GFX6-NEXT:    v_mov_b32_e32 v7, s47
+; GFX6-NEXT:    s_bfe_i64 s[16:17], s[42:43], 0x10000
+; GFX6-NEXT:    s_bfe_i64 s[30:31], s[40:41], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT:    v_mov_b32_e32 v16, s36
-; GFX6-NEXT:    v_mov_b32_e32 v17, s37
-; GFX6-NEXT:    s_lshr_b32 s36, s4, 11
-; GFX6-NEXT:    v_mov_b32_e32 v2, s38
-; GFX6-NEXT:    v_mov_b32_e32 v3, s39
-; GFX6-NEXT:    s_lshr_b32 s38, s4, 8
+; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:304
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v6, s30
-; GFX6-NEXT:    v_mov_b32_e32 v7, s31
-; GFX6-NEXT:    s_lshr_b32 s30, s4, 9
-; GFX6-NEXT:    v_mov_b32_e32 v8, s34
-; GFX6-NEXT:    v_mov_b32_e32 v9, s35
-; GFX6-NEXT:    s_lshr_b32 s34, s4, 6
-; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:288
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v10, s26
-; GFX6-NEXT:    v_mov_b32_e32 v11, s27
-; GFX6-NEXT:    s_lshr_b32 s26, s4, 7
-; GFX6-NEXT:    v_mov_b32_e32 v12, s28
-; GFX6-NEXT:    v_mov_b32_e32 v13, s29
-; GFX6-NEXT:    s_lshr_b32 s28, s4, 4
+; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:272
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v14, s22
-; GFX6-NEXT:    v_mov_b32_e32 v15, s23
-; GFX6-NEXT:    s_lshr_b32 s22, s4, 5
-; GFX6-NEXT:    v_mov_b32_e32 v16, s24
-; GFX6-NEXT:    v_mov_b32_e32 v17, s25
-; GFX6-NEXT:    s_lshr_b32 s24, s4, 2
 ; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s20
-; GFX6-NEXT:    v_mov_b32_e32 v1, s21
-; GFX6-NEXT:    s_lshr_b32 s20, s4, 3
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
-; GFX6-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
 ; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:240
-; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:224
-; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:208
-; GFX6-NEXT:    v_mov_b32_e32 v2, s18
-; GFX6-NEXT:    v_mov_b32_e32 v3, s19
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s14
-; GFX6-NEXT:    v_mov_b32_e32 v1, s15
-; GFX6-NEXT:    v_mov_b32_e32 v2, s16
-; GFX6-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s10
-; GFX6-NEXT:    v_mov_b32_e32 v1, s11
-; GFX6-NEXT:    v_mov_b32_e32 v2, s12
-; GFX6-NEXT:    v_mov_b32_e32 v3, s13
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
-; GFX6-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s7
-; GFX6-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NEXT:    v_mov_b32_e32 v3, s9
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s50
-; GFX6-NEXT:    v_mov_b32_e32 v1, s51
-; GFX6-NEXT:    v_mov_b32_e32 v2, s52
-; GFX6-NEXT:    v_mov_b32_e32 v3, s53
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GFX6-NEXT:    v_mov_b32_e32 v0, s8
+; GFX6-NEXT:    v_mov_b32_e32 v1, s9
+; GFX6-NEXT:    v_mov_b32_e32 v2, s10
+; GFX6-NEXT:    v_mov_b32_e32 v3, s11
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s46
-; GFX6-NEXT:    v_mov_b32_e32 v1, s47
-; GFX6-NEXT:    v_mov_b32_e32 v2, s48
-; GFX6-NEXT:    v_mov_b32_e32 v3, s49
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GFX6-NEXT:    v_mov_b32_e32 v0, s12
+; GFX6-NEXT:    v_mov_b32_e32 v1, s13
+; GFX6-NEXT:    v_mov_b32_e32 v2, s14
+; GFX6-NEXT:    v_mov_b32_e32 v3, s15
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s42
-; GFX6-NEXT:    v_mov_b32_e32 v1, s43
-; GFX6-NEXT:    v_mov_b32_e32 v2, s44
-; GFX6-NEXT:    v_mov_b32_e32 v3, s45
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_mov_b32_e32 v3, s5
+; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:448
+; GFX6-NEXT:    v_mov_b32_e32 v0, s18
+; GFX6-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s40
-; GFX6-NEXT:    v_mov_b32_e32 v1, s41
-; GFX6-NEXT:    v_mov_b32_e32 v2, s36
-; GFX6-NEXT:    v_mov_b32_e32 v3, s37
+; GFX6-NEXT:    v_mov_b32_e32 v2, s20
+; GFX6-NEXT:    v_mov_b32_e32 v3, s21
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s38
-; GFX6-NEXT:    v_mov_b32_e32 v1, s39
-; GFX6-NEXT:    v_mov_b32_e32 v2, s30
-; GFX6-NEXT:    v_mov_b32_e32 v3, s31
+; GFX6-NEXT:    v_mov_b32_e32 v0, s22
+; GFX6-NEXT:    v_mov_b32_e32 v1, s23
+; GFX6-NEXT:    v_mov_b32_e32 v2, s24
+; GFX6-NEXT:    v_mov_b32_e32 v3, s25
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s34
-; GFX6-NEXT:    v_mov_b32_e32 v1, s35
-; GFX6-NEXT:    v_mov_b32_e32 v2, s26
-; GFX6-NEXT:    v_mov_b32_e32 v3, s27
+; GFX6-NEXT:    v_mov_b32_e32 v0, s26
+; GFX6-NEXT:    v_mov_b32_e32 v1, s27
+; GFX6-NEXT:    v_mov_b32_e32 v2, s28
+; GFX6-NEXT:    v_mov_b32_e32 v3, s29
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s28
-; GFX6-NEXT:    v_mov_b32_e32 v1, s29
-; GFX6-NEXT:    v_mov_b32_e32 v2, s22
-; GFX6-NEXT:    v_mov_b32_e32 v3, s23
+; GFX6-NEXT:    v_mov_b32_e32 v0, s34
+; GFX6-NEXT:    v_mov_b32_e32 v1, s35
+; GFX6-NEXT:    v_mov_b32_e32 v2, s36
+; GFX6-NEXT:    v_mov_b32_e32 v3, s37
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, s24
-; GFX6-NEXT:    v_mov_b32_e32 v1, s25
-; GFX6-NEXT:    v_mov_b32_e32 v2, s20
-; GFX6-NEXT:    v_mov_b32_e32 v3, s21
+; GFX6-NEXT:    v_mov_b32_e32 v0, s38
+; GFX6-NEXT:    v_mov_b32_e32 v1, s39
+; GFX6-NEXT:    v_mov_b32_e32 v2, s30
+; GFX6-NEXT:    v_mov_b32_e32 v3, s31
 ; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NEXT:    v_mov_b32_e32 v6, s4
-; GFX6-NEXT:    v_mov_b32_e32 v7, s5
-; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GFX6-NEXT:    v_mov_b32_e32 v8, s16
+; GFX6-NEXT:    v_mov_b32_e32 v9, s17
+; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: constant_sextload_v64i1_to_v64i64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 5332da6827ec3f6..faf359c76671da3 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -1707,29 +1707,28 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s19
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s11
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s18
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s17
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s4
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
@@ -1931,29 +1930,27 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s19
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s11
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s18
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s17
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
@@ -2172,56 +2169,54 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s29
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s27
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s7
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s26
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[16:19], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s24
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[16:19], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s2
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s23
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s3
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[16:19], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s21
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s1
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s20
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s36
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s35
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[16:19], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s3
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s11
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[16:19], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s8
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[16:19], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[16:19], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
@@ -2580,58 +2575,56 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s13, 16
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s12, 16
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s15, 16
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s36, s14, 16
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s29
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s28
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s27
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s7
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s26
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s25
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s24
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s2
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s23
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s3
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s3
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s21
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s1
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s20
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s36
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s35
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[16:19], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s33
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[16:19], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
@@ -2978,22 +2971,22 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s2, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s41, s5, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s4, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s45, s7, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s6, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s47, s9, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s8, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s49, s11, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s10, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s51, s13, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s12, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s53, s15, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s54, s14, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s43, s7, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s44, s6, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s45, s9, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s8, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s47, s11, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s10, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s49, s13, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s12, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s51, s15, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s14, 16
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s39, s1, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s40, s0, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s43, s3, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s44, s2, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s53, s3, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s54, s2, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s55, s4, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
@@ -3004,19 +2997,19 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s55, s17, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s16, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s57, s19, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s18, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s59, s21, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s20, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s61, s23, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s22, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s63, s25, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s64, s24, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s65, s27, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s66, s26, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s67, s29, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s17, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s57, s16, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s19, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s59, s18, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s21, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s61, s20, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s23, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s63, s22, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s64, s25, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s65, s24, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s66, s27, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s67, s26, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s29, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s68, s28, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s69, s31, 16
 ; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s70, s30, 16
@@ -3027,111 +3020,110 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s21, s21, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s20, s20, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s23, s23, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s22, s22, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s25, s25, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s24, s24, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s27, s27, 0xffff
+; GCN-NOHSA-SI-NEXT:    s_and_b32 s26, s26, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s29, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s28, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s31, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s30, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s26, s26, 0xffff
-; GCN-NOHSA-SI-NEXT:    s_and_b32 s22, s22, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s28
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s26
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s27
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s24
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s25
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(3)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s22
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s62
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s61
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s67
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s66
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s65
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s63
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s62
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s61
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s21
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s18
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s59
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s19
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s58
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:144
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s60
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s59
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s57
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s17
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s56
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:128
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s58
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s57
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s52
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s51
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s50
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s49
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s54
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s53
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s11
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s47
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s46
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s45
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s44
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s7
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s43
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s55
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s42
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s41
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s54
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s38
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s53
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s35
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s42
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s41
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s40
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s39
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s33
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s26
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s31
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s44
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s43
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s70
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s69
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s68
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s25
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:240
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:192
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s23
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s20
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
@@ -3807,18 +3799,20 @@ define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s58, s2
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s59, s5, 16
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s60, s4, 16
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
-; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s61, s7, 16
-; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s62, s6, 16
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s61, s5
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s62, s4
+; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s7, 16
+; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s4, s6, 16
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s7
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
-; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s8, 16
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s64, s9
+; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s7, s9, 16
+; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s64, s8, 16
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s65, s11, 16
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s66, s10, 16
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
+; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s67, s13, 16
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s68, s12, 16
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
@@ -3827,104 +3821,100 @@ define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s70, s14, 16
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
 ; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
-; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
-; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s9, s9, 16
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s63
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s64
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s9
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(3)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s62
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s61
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s60
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s59
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s58
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s57
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s63
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s62
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s61
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s59
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s58
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s56
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s57
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s55
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s54
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s52
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s53
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s51
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(2)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s50
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s49
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s54
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s53
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s29
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s47
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s26
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s46
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s27
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s45
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s24
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s44
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s25
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s43
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s26
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s22
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s42
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s23
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s41
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s24
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s44
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s43
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s20
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s40
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s21
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s39
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s22
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s42
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s41
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s18
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s38
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s19
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s35
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s40
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s17
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s33
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s70
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s69
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s68
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s67
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s11
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s7
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
@@ -5446,30 +5436,31 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s7
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s15
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s13
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
@@ -5975,54 +5966,52 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s36
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s37
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s35
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s29
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[26:27], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s13
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s11
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s15
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s9
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s34
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s35
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s30
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s24
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s26
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s27
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s19
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s20
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s21
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s22
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s23
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s24
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s25
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
@@ -6373,68 +6362,68 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
 ; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:240
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:208
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:176
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:144
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s1
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s1
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:224
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:192
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:160
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:128
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s33
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s36
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
@@ -6917,146 +6906,149 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s15
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, s13
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s22, s11
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s30, s9
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s28, s7
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s36, s5
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s40, s3
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s44, s1
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s14, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s12, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s10, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s8, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s6, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s4, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s2, 16
-; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s0, 16
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[62:63], s[20:21], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[64:65], s[18:19], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[66:67], s[30:31], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[68:69], s[22:23], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[50:51], s[8:9], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s36, s15
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s38, s13
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s40, s11
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s44, s9
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s50, s7
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s46, s5
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s48, s3
+; GCN-NOHSA-SI-NEXT:    s_mov_b32 s52, s1
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s14, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s12, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s10, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s8, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s6, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s4, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s2, 16
+; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s0, 16
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[0:1], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[2:3], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[4:5], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[34:35], s[6:7], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[42:43], s[8:9], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[54:55], s[10:11], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[56:57], s[12:13], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[58:59], s[14:15], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[60:61], s[0:1], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[70:71], s[2:3], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[72:73], s[4:5], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[2:3], s[8:9], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[12:13], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[14:15], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
-; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[74:75], s[6:7], 48
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[64:65], s[12:13], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[66:67], s[14:15], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[68:69], s[0:1], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[2:3], s[2:3], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[70:71], s[4:5], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[72:73], s[6:7], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[8:9], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[10:11], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[12:13], 48
+; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[14:15], 48
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s64
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s65
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s62
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s63
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s72
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s73
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s70
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s71
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s3
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s68
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s69
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s66
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s67
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s2
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s3
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s66
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s67
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[2:3], s[62:63], 0x100000
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s2
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s3
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s64
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[60:61], 0x100000
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s65
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s13
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[50:51], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[58:59], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[56:57], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[50:51], s[52:53], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x100000
 ; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[52:53], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[48:49], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[46:47], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[42:43], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[38:39], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[34:35], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[26:27], 0x100000
-; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(3)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s74
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s75
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(1)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s54
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s55
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s17
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:160
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s72
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s73
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s42
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s43
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s15
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:128
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s41
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s70
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s71
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s36
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s37
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s11
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:240
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s44
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s45
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s60
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s61
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s34
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s35
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s58
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s59
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s56
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s57
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s54
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s55
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s50
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s51
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s30
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s31
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s25
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s46
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s29
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s22
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s16
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s17
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s49
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s20
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s14
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s15
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s26
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s27
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[24:25], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[22:23], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[20:21], 0x100000
+; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[18:19], 0x100000
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s50
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s51
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s18
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s19
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s12
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s13
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s10
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s11
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s38
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s39
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s8
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s9
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s40
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s41
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s6
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s7
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s44
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s45
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s4
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s17
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s15
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s6
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s7
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s4
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s12
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s13
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(3)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s10
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s11
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
index 87913841012184c..2094e5f205efe9f 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
@@ -2146,6 +2146,7 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %ou
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s3
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s0
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s1
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s12
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s18
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
@@ -2156,7 +2157,6 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %ou
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s14
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:16
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s12
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[8:11], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
@@ -2379,9 +2379,9 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s30, s11, 31
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s31, s10, 31
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s33, s13, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s15, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s14, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s36, s12, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s12, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s15, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s36, s14, 31
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s12
@@ -2390,39 +2390,41 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s11
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s28
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s27
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v24, s2
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v26, s3
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s35
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s26
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[16:19], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s24
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[16:19], 0 offset:32
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s2
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s3
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s22
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[16:19], 0 offset:16
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s1
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s34
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s33
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:96
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s31
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s30
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[16:19], 0 offset:80
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s28
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[16:19], 0 offset:64
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s27
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s26
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[16:19], 0 offset:48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s24
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[16:19], 0 offset:32
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s23
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v27, s22
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[24:27], off, s[16:19], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[16:19], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v16i32_to_v16i64:
@@ -3095,138 +3097,137 @@ define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(ptr addrspace(1) %
 define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
 ; GFX6-NOHSA-LABEL: constant_sextload_v32i32_to_v32i64:
 ; GFX6-NOHSA:       ; %bb.0:
-; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
+; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
+; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[16:31], s[2:3], 0x0
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s39, 0xf000
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s38, -1
-; GFX6-NOHSA-NEXT:    s_mov_b32 s36, s16
-; GFX6-NOHSA-NEXT:    s_mov_b32 s37, s17
-; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[16:31], s[18:19], 0x10
+; GFX6-NOHSA-NEXT:    s_mov_b32 s36, s0
+; GFX6-NOHSA-NEXT:    s_mov_b32 s37, s1
+; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x10
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s33, s1, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s0, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s3, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s40, s2, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s41, s5, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s42, s4, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s43, s7, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s44, s6, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s45, s17, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s46, s16, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s47, s19, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s48, s18, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s49, s21, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s20, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s51, s23, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s52, s30, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s31, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s52, s28, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s53
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s29, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s52
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s52, s26, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s53
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s27, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s52
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s52, s22, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s53
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s25, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s28
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s26
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s27
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s24
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s22
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s23
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:240
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s17
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s17, s17, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s16, s16, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s19, s19, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s18, s18, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s21, s21, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s20, s20, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s23, s23, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s22, s22, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s25, s25, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s33, s24, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s27, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s26, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s40, s29, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s41, s28, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s42, s31, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s43, s30, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s44, s1, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s45, s0, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s46, s3, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s47, s2, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s48, s5, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s49, s4, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s7, 31
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s51, s6, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s51
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s51, s9, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s50
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s8, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s49
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s49, s11, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s48
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s48, s10, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[19:22], off, s[36:39], 0 offset:176
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v26, s47
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s6, s13, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v28, s46
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s7, s12, 31
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s21
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[36:39], 0 offset:224
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v24, s5
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[22:25], off, s[36:39], 0 offset:160
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s45
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s4, s15, 31
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s19
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[36:39], 0 offset:208
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s44
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s5, s14, 31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s2
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v27, s3
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[25:28], off, s[36:39], 0 offset:144
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s14
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s17
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s16, s24, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s17, s9, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s18, s8, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s19, s11, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s20, s10, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s21, s13, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s22, s12, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s23, s15, 31
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s24, s14, 31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s53
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[36:39], 0 offset:192
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s1
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[19:22], off, s[36:39], 0 offset:128
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s52
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s51
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[36:39], 0 offset:176
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s30
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s43
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s42
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[19:22], off, s[36:39], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s12
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s50
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s49
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:160
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s28
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s41
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s29
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s40
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[19:22], off, s[36:39], 0 offset:96
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s35
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s34
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[19:22], off, s[36:39], 0 offset:80
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s25
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:64
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s47
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[36:39], 0 offset:144
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s23
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[36:39], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s46
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s45
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[36:39], 0 offset:128
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s51
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s21
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[36:39], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s19
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[36:39], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v24, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v26, s4
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[23:26], off, s[36:39], 0 offset:240
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s7
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s24
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s23
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[36:39], 0 offset:112
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s22
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s21
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[36:39], 0 offset:96
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s2
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s3
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s19
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:80
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s17
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[36:39], 0 offset:64
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s44
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s43
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[36:39], 0 offset:48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s42
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s41
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[36:39], 0 offset:32
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s40
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s35
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[36:39], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s34
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s33
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s6
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[36:39], 0 offset:224
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s49
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0 offset:208
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s50
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[36:39], 0 offset:192
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[36:39], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v32i32_to_v32i64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index 66fc322e5e04b57..5290d1a2dc35a6c 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -1519,29 +1519,28 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_and_b32 s19, s7, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s14
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s12
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s17
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s16
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i32:
@@ -1743,29 +1742,27 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s7, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s17
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s14
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i32:
@@ -1992,56 +1989,54 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_and_b32 s34, s9, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s35, s10, 0xff
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s36, s11, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s26
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s18
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s30
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s16
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s29
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s14
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s16
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s28
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s26
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:112
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:80
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s33
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i32:
@@ -2395,58 +2390,56 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s30, s10, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s31, s10, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s33, s10, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s11, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s35, s11, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s36, s11, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s11, s11
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s35
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s24
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s21
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s18
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s15
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s35
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s34
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s33
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s30
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i32:
@@ -2806,35 +2799,35 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s1, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s21, s1, 0x80008
 ; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s2, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s23, s2, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s3, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s26, s3, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s27, s4, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s28, s4, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s29, s5, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s30, s5, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s31, s6, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s33, s6, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s34, s7, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s35, s7, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s36, s8, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s37, s8, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s38, s9, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s39, s9, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s40, s10, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s41, s10, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s42, s11, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s43, s11, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s44, s12, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s45, s12, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s46, s13, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s47, s13, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s48, s14, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s49, s14, 0x80008
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s50, s15, 24
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s51, s15, 0x80008
-; GFX6-NOHSA-NEXT:    s_and_b32 s52, s0, 0xff
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s25, s0, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s25, s2, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s27, s3, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s28, s3, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s29, s4, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s30, s4, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s31, s5, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s33, s5, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s34, s6, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s35, s6, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s36, s7, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s37, s7, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s38, s8, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s39, s8, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s40, s9, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s41, s9, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s42, s10, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s43, s10, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s23, s11, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s44, s11, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s45, s12, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s46, s12, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s47, s13, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s48, s13, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s49, s14, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s50, s14, 0x80008
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s51, s15, 24
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s52, s15, 0x80008
+; GFX6-NOHSA-NEXT:    s_and_b32 s26, s0, 0xff
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s24, s0, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s53, s1, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s54, s1, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s55, s2, 0xff
@@ -2856,111 +2849,112 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_and_b32 s65, s10, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s66, s11, 0xff
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s67, s12, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s68, s13, 0xff
+; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s69, s14, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
 ; GFX6-NOHSA-NEXT:    s_and_b32 s70, s15, 0xff
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s15, s15, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s16
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s17
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s70
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s51
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s50
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s69
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s49
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s51
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s50
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s49
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s68
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s47
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s46
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s67
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s45
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s12
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s44
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(3)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s66
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s43
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s42
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s46
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s45
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s43
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s42
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s41
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s40
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s65
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s41
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s40
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s63
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s39
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s38
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:128
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s64
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s39
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s38
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s62
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s37
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s36
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s63
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s36
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s61
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s35
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s34
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s62
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s35
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s60
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s33
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s31
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s61
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s31
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s59
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s30
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s29
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:64
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s60
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s30
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s29
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s57
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s28
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s58
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s27
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s59
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s28
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s55
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s56
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s22
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s57
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s58
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s53
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s54
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s20
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s55
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s56
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s44
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s70
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s18
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s53
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s54
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s69
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s14
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:224
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s52
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s68
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s13
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:208
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s67
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s66
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:192
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s65
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s10
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:160
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_zextload_v64i8_to_v64i32:
@@ -3597,41 +3591,43 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s4, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s36, s4, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s37, s4, 0x80008
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s38, s5, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s39, s5, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s40, s5, 0x80008
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s41, s6, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s42, s6, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s43, s6, 0x80008
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s44, s7, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s45, s7, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s46, s7, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s38, s4
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s39, s5, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s40, s5, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s41, s5, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s42, s5
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s43, s6, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s44, s6, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s45, s6, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s46, s6
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s47, s7, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s48, s7, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s49, s7, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s47, s8, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s48, s8, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s49, s8, 0x80008
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s8, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s51, s8, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s52, s8, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s8, s8
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s9, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s51, s9, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s52, s9, 0x80008
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s9, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s54, s9, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s55, s9, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s9, s9
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s10, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s54, s10, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s55, s10, 0x80008
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s56, s10, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s57, s10, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s58, s10, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s56, s11, 24
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s57, s11, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s58, s11, 0x80008
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s11, s11
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s59, s12, 0x80010
-; GFX6-NOHSA-NEXT:    s_bfe_i32 s60, s12, 0x80008
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s61, s12
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s59, s11, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s4, s11, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s5, s11, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s11
+; GFX6-NOHSA-NEXT:    s_ashr_i32 s11, s12, 24
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s60, s12, 0x80010
+; GFX6-NOHSA-NEXT:    s_bfe_i32 s61, s12, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s12, s12
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s62, s13, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s63, s13, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s64, s13, 0x80008
+; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s13, s13
 ; GFX6-NOHSA-NEXT:    s_ashr_i32 s65, s14, 24
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s66, s14, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s67, s14, 0x80008
@@ -3640,104 +3636,100 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s69, s15, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_i32 s70, s15, 0x80008
 ; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s15, s15
-; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s13, s13
-; GFX6-NOHSA-NEXT:    s_ashr_i32 s12, s12, 24
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s16
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s17
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s70
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s69
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s68
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s67
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s66
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s65
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s64
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s63
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s62
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s61
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s60
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s59
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s12
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(3)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s58
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s57
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s56
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s55
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s54
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s53
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s51
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s50
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s59
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s58
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s57
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s56
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s55
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s54
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s53
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s52
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s51
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:160
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s50
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s49
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s47
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s49
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s47
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s45
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s44
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s43
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s46
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s45
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s44
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s42
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s41
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s40
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s39
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s43
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s42
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s38
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s37
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s35
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s40
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s39
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s38
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s34
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s33
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s30
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s36
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s29
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s28
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s26
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s22
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s28
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s27
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s26
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s70
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s69
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s68
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s24
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s23
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s67
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s66
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s65
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s21
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s63
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s62
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s61
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s60
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s11
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v64i8_to_v64i32:
@@ -5312,32 +5304,32 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[4:5], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
 ; GFX6-NOHSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 56
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s9
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s9
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(1)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s12
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s17
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s15
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i64:
@@ -5831,80 +5823,82 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[12:15], s[2:3], 0x0
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s7, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s7, 8
-; GFX6-NOHSA-NEXT:    s_mov_b32 s12, s7
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s6, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s6, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s6, 8
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s5, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s5, 8
-; GFX6-NOHSA-NEXT:    s_mov_b32 s24, s5
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s26, s4, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s4, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s4, 8
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[34:35], s[4:5], 0x80000
-; GFX6-NOHSA-NEXT:    s_ashr_i64 s[36:37], s[4:5], 56
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[38:39], s[6:7], 0x80000
-; GFX6-NOHSA-NEXT:    s_ashr_i64 s[40:41], s[6:7], 56
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s15, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s15, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s20, s15
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s4, s14, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s14, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s14, 8
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s13, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s13, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s24, s13
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s26, s12, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s12, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s12, 8
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[34:35], s[12:13], 0x80000
+; GFX6-NOHSA-NEXT:    s_ashr_i64 s[12:13], s[12:13], 56
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[36:37], s[14:15], 0x80000
+; GFX6-NOHSA-NEXT:    s_ashr_i64 s[14:15], s[14:15], 56
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[30:31], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s34
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s35
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[30:31], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[28:29], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s40
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s12
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s38
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s39
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s36
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s37
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s24
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s25
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s34
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s35
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s11
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(1)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s19
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s15
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s21
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s22
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s23
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s26
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s5
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s19
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s9
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s23
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(1)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s25
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s13
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i64:
@@ -6263,68 +6257,68 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
 ; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s21
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s23
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
@@ -6820,76 +6814,110 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
 ; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s46, s7, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s48, s7, 8
-; GFX6-NOHSA-NEXT:    s_mov_b32 s50, s7
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s6, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s6, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s6, 8
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s5, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s5, 8
-; GFX6-NOHSA-NEXT:    s_mov_b32 s44, s5
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s4, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s4, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s4, 8
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s26, s3, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s3, 8
-; GFX6-NOHSA-NEXT:    s_mov_b32 s40, s3
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s2, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s34, s2, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s36, s2, 8
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s38, s1, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s42, s1, 8
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[58:59], s[50:51], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[60:61], s[44:45], 0x80000
-; GFX6-NOHSA-NEXT:    s_mov_b32 s62, s1
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s56, s0, 16
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s64, s0, 24
-; GFX6-NOHSA-NEXT:    s_lshr_b32 s66, s0, 8
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[44:45], s[0:1], 0x80000
-; GFX6-NOHSA-NEXT:    s_ashr_i64 s[50:51], s[0:1], 56
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[52:53], s[2:3], 0x80000
-; GFX6-NOHSA-NEXT:    s_ashr_i64 s[68:69], s[4:5], 56
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[70:71], s[6:7], 0x80000
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s34, s7, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s36, s7, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s38, s7
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s40, s6, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s44, s6, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s46, s6, 8
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s66, s5, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s48, s5, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s50, s5
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s52, s4, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s54, s4, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s56, s4, 8
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s58, s3, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s60, s3, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s62, s3
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s64, s2, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s2, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s2, 8
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s1, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s1, 8
+; GFX6-NOHSA-NEXT:    s_mov_b32 s68, s1
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s0, 16
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s0, 24
+; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s0, 8
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[0:1], 0x80000
+; GFX6-NOHSA-NEXT:    s_ashr_i64 s[26:27], s[0:1], 56
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[2:3], 0x80000
+; GFX6-NOHSA-NEXT:    s_ashr_i64 s[30:31], s[2:3], 56
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[42:43], s[4:5], 0x80000
+; GFX6-NOHSA-NEXT:    s_ashr_i64 s[2:3], s[4:5], 56
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[6:7], 0x80000
 ; GFX6-NOHSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 56
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
-; GFX6-NOHSA-NEXT:    s_ashr_i64 s[54:55], s[2:3], 56
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s8
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s9
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s58
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s59
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s70
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s71
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s68
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s69
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s60
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s61
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[66:67], 0x80000
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s5
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s2
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s3
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[46:47], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[48:49], 0x80000
 ; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s5
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s7
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:240
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[46:47], s[62:63], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[48:49], s[40:41], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[66:67], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[64:65], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[56:57], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[40:41], s[42:43], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[66:67], s[68:69], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[46:47], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[44:45], 0x80000
+; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[40:41], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
-; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s5
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s50
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s51
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s49
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s52
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s53
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s54
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s55
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s42
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s43
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s56
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s57
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:128
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s58
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s59
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s30
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s31
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s62
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s63
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s60
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s61
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s64
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s65
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s28
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s29
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s26
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s27
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s66
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s67
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s24
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s25
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
@@ -6897,80 +6925,44 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
 ; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:224
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(1)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s12
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s13
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s54
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s55
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s14
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s15
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:192
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s49
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s52
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s53
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s18
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s19
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:160
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s20
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s21
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s22
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s23
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:144
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s50
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s51
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s24
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s25
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:128
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s47
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s26
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s27
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:112
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s44
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s45
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s28
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s29
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s30
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s31
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s35
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
 ; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s34
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s35
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s38
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s39
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s36
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s37
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s38
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s39
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:48
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(2)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s40
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s41
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s4
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s10
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s11
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:80
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s13
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:64
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s14
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s15
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s16
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s17
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(2)
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s5
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v24, s22
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s23
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i64:
@@ -8701,29 +8693,30 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
 ; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v3, s9, v3, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 8
 ; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s15, v0, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v1, s13, v1, 16
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s9, s14, 8
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s13, v1, 16
 ; GFX6-NOHSA-NEXT:    s_lshl_b32 s12, s12, 8
 ; GFX6-NOHSA-NEXT:    v_alignbit_b32 v2, s11, v2, 16
 ; GFX6-NOHSA-NEXT:    s_lshl_b32 s10, s10, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s9, v3, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 8
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
-; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s14
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v1
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v3
+; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s8
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v6, 0xff00ff, v0
+; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s9
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v4, 0xff00ff, v4
 ; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s12
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v2
 ; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s10
-; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s8
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v2
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i16:
@@ -9027,16 +9020,15 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_or_b32 s11, s18, s17
 ; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s19
 ; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s13
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s10
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
@@ -9372,59 +9364,60 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
 ; GFX6-NOHSA-NEXT:    s_and_b32 s3, s3, 0xff
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v3, s21, v3, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s2, s2, 0xff
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s20, s20, 8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s5
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s19, v4, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xff
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s18, s18, 8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s4
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v5, s17, v5, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s16, s16, 8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v6, s15, v6, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 8
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s6
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v8, s13, v7, 16
 ; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s27, v0, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s26, s26, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v1, s25, v1, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s24, s24, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v2, s23, v2, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s22, s22, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v8, s21, v3, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s20, s20, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s19, v4, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s18, s18, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v9, s17, v5, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s16, s16, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v6, s15, v6, 16
-; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 8
-; GFX6-NOHSA-NEXT:    v_alignbit_b32 v10, s13, v7, 16
 ; GFX6-NOHSA-NEXT:    s_lshl_b32 s12, s12, 8
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
-; GFX6-NOHSA-NEXT:    s_or_b32 s1, s1, s26
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v1
-; GFX6-NOHSA-NEXT:    s_or_b32 s0, s0, s24
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v2
-; GFX6-NOHSA-NEXT:    s_or_b32 s3, s3, s22
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v8
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v10, s27, v0, 16
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s13, s26, 8
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v12, s25, v1, 16
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s15, s24, 8
+; GFX6-NOHSA-NEXT:    v_alignbit_b32 v2, s23, v2, 16
+; GFX6-NOHSA-NEXT:    s_lshl_b32 s17, s22, 8
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v3
 ; GFX6-NOHSA-NEXT:    s_or_b32 s2, s2, s20
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v11, 0xff00ff, v4
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v4
 ; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s18
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v9, 0xff00ff, v9
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v5
 ; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s16
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v15, 0xff00ff, v6
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v11, 0xff00ff, v6
 ; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s14
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v9, 0xff00ff, v8
 ; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s12
-; GFX6-NOHSA-NEXT:    v_and_b32_e32 v13, 0xff00ff, v10
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s7
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[8:11], 0 offset:48
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s4
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s5
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:32
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s3
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v14, 0xff00ff, v10
+; GFX6-NOHSA-NEXT:    s_or_b32 s1, s1, s13
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v12, 0xff00ff, v12
+; GFX6-NOHSA-NEXT:    s_or_b32 s0, s0, s15
+; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v2
+; GFX6-NOHSA-NEXT:    s_or_b32 s2, s3, s17
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s7
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:48
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s5
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:32
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
+; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(2)
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s0
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s1
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[11:14], off, s[8:11], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
 ; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i16:
@@ -9985,17 +9978,11 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    s_or_b32 s16, s25, s24
 ; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s26
 ; GFX6-NOHSA-NEXT:    s_or_b32 s17, s28, s27
+; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s29
 ; GFX6-NOHSA-NEXT:    s_or_b32 s18, s31, s30
 ; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s33
 ; GFX6-NOHSA-NEXT:    s_or_b32 s19, s35, s34
 ; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s36
-; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s29
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
-; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
-; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
-; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
@@ -10012,6 +9999,11 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s19
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
+; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s18
+; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:48
 ; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX6-NOHSA-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 7c80a220b72d7b0..5e126d692238adf 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -2641,13 +2641,13 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v32, 0xffff, v12
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
@@ -3052,13 +3052,13 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v32, v12, 0, 16
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
@@ -3460,8 +3460,8 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[27:30], off, s[8:11], 0 offset:64
@@ -3469,106 +3469,105 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(7)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v15
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v16, 16, v14
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v13
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v12
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v19
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v18
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v22, 16, v17
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(6)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v26, 16, v11
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v10
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v17, 0xffff, v15
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v15, 0xffff, v14
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v15, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v26, 16, v15
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v14
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v19
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v18
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v8, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v16, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v17, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v18, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, 0xffff, v13
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v12
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v18, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v9, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v10, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v11, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v21, 0xffff, v17
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, 0xffff, v16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v19, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(2)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v9
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v8
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v25, 0xffff, v11
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v23, 0xffff, v10
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v9
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v8
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v15, 16, v7
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v6
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v46, 16, v5
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v4
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v14, 0xffff, v7
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v6
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v45, 0xffff, v5
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v43, 0xffff, v4
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v11, 16, v3
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v50, 16, v1
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v0
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v10, 0xffff, v3
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v2
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v49, 0xffff, v1
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v47, 0xffff, v0
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v54, 16, v30
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v29
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v58, 16, v28
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v56, 16, v27
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v53, 0xffff, v30
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v51, 0xffff, v29
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v57, 0xffff, v28
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v55, 0xffff, v27
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v22, 16, v13
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v12
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v25, 0xffff, v15
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v23, 0xffff, v14
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v21, 0xffff, v13
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, 0xffff, v12
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v46, 16, v7
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v6
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v50, 16, v5
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v4
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v45, 0xffff, v7
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v43, 0xffff, v6
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v49, 0xffff, v5
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v47, 0xffff, v4
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v54, 16, v1
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v0
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v5, 0xffff, v3
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v3, 0xffff, v2
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v53, 0xffff, v1
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v51, 0xffff, v0
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v58, 16, v30
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v56, 16, v29
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v62, 16, v28
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v60, 16, v27
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v57, 0xffff, v30
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v55, 0xffff, v29
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v61, 0xffff, v28
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v59, 0xffff, v27
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v30, 16, v34
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v28, 16, v33
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v62, 16, v32
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v60, 16, v31
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v32
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v16, 16, v31
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v29, 0xffff, v34
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v27, 0xffff, v33
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v61, 0xffff, v32
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v59, 0xffff, v31
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v34, 16, v38
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v37
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v36
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v35
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v33, 0xffff, v38
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v31, 0xffff, v37
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v6, 0xffff, v36
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v35
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v17, 0xffff, v32
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v15, 0xffff, v31
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v38
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v12, 16, v37
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v34, 16, v36
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v35
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v13, 0xffff, v38
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v11, 0xffff, v37
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v33, 0xffff, v36
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v31, 0xffff, v35
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v38, 16, v42
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v41
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v40
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v39
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v40
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v8, 16, v39
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v37, 0xffff, v42
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v35, 0xffff, v41
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v2, 0xffff, v40
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v39
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v9, 0xffff, v40
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v7, 0xffff, v39
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(4)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:240
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:208
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
@@ -4276,14 +4275,14 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s7
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s2
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, s3
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[40:43], off, s[4:7], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[40:43], off, s[4:7], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[44:47], off, s[4:7], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v11
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v10
@@ -4294,82 +4293,94 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v9
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v8
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v9, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v8, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v35
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v34
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v35, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v34, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v15, 16, v33
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 16, v32
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v14, v33, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v32, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v35, 16, v39
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v33, 16, v38
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v34, v39, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v32, v38, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v47, 16, v37
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v45, 16, v36
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v46, v37, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v44, v36, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v39, 16, v43
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v37, 16, v42
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v38, v43, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v36, v42, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v51, 16, v41
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v49, 16, v40
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v50, v41, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v48, v40, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v43, 16, v31
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v41, 16, v30
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v42, v31, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v40, v30, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v55, 16, v29
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v53, 16, v28
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v54, v29, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v52, v28, 0, 16
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v9
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v8
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v9, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v8, 0, 16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0 offset:20 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[8:11], 0 offset:24 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0 offset:28 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[8:11], 0 offset:32 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v11, 16, v19
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 16, v18
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v10, v19, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v18, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v15, 16, v17
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 16, v16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v14, v17, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v16, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 16, v43
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v17, 16, v42
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v43, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v16, v42, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 16, v41
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 16, v40
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v41, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v40, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v43, 16, v47
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v41, 16, v46
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v42, v47, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v40, v46, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v48, 16, v45
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v46, 16, v44
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v47, v45, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v45, v44, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v52, 16, v39
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v50, 16, v38
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v51, v39, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v49, v38, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v56, 16, v37
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v54, 16, v36
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v55, v37, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v53, v36, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v38, 16, v35
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v36, 16, v34
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v37, v35, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v35, v34, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v60, 16, v33
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v58, 16, v32
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v59, v33, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v57, v32, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v34, 16, v31
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v32, 16, v30
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v33, v31, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v31, v30, 0, 16
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v29
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v28
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v29, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v28, 0, 16
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v30, 16, v27
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v28, 16, v26
 ; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v29, v27, 0, 16
 ; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v27, v26, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v59, 16, v25
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v57, 16, v24
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v58, v25, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v56, v24, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v26, 16, v23
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v24, 16, v22
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v25, v23, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v23, v22, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v63, 16, v21
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v61, 16, v20
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v62, v21, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v60, v20, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v22, 16, v19
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 16, v18
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v21, v19, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v18, 0, 16
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v17
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v2, v17, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v16, 0, 16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v7, 16, v25
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 16, v24
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v6, v25, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v24, 0, 16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:240
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[57:60], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[53:56], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[49:52], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:20 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[8:11], 0 offset:24 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[8:11], 0 offset:28 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[8:11], 0 offset:32 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
@@ -6481,48 +6492,49 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v2
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v0
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v0
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v2
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v1
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v3
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, 0
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v22, 16, v5
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v26, 16, v6
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, 0xffff, v6
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v30, 16, v7
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v28, 0xffff, v7
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, 0xffff, v5
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v29, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v31, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v21
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v21
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v4
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v2
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v0
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v23, 0xffff, v0
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v19, 0xffff, v2
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v1
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v3
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v6
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v4
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v7
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v7
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v29, 16, v5
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v27, 0xffff, v5
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v28, 0
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, v28
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v30, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v28
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v28
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v28
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64:
@@ -7243,133 +7255,108 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
 define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, -1
-; GCN-NOHSA-SI-NEXT:    s_mov_b32 s15, 0xe8f000
-; GCN-NOHSA-SI-NEXT:    s_add_u32 s12, s12, s3
-; GCN-NOHSA-SI-NEXT:    s_addc_u32 s13, s13, 0
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v39, 0
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s2
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s3
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[14:17], off, s[8:11], 0
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[18:21], off, s[8:11], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[22:25], off, s[8:11], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[26:29], off, s[8:11], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v15
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v17
-; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v20
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v16
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v14
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v14
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v12, 0xffff, v16
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, v3
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, 0xffff, v15
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v34, 0xffff, v17
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v10, 16, v18
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v8, 0xffff, v18
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v20
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, v5
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v19
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v16, 0xffff, v19
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v44, 16, v21
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v42, 0xffff, v21
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v48, 16, v22
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v46, 0xffff, v22
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v22, 16, v24
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v20, 0xffff, v24
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v52, 16, v23
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v50, 0xffff, v23
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v56, 16, v25
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v54, 0xffff, v25
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v40, 16, v29
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v60, 16, v26
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v58, 0xffff, v26
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v26, 16, v28
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v24, 0xffff, v28
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(1)
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v27
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v27
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v38, 0xffff, v29
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v41, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v39
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v55, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v57, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v51, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v53, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v43, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v45, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v35, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v37, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v31, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v33, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v59, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v61, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v47, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v49, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, v39
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v12, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[3:6], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v4
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v0, 0xffff, v3
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v7, 0xffff, v5
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v15, 0xffff, v4
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v6
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v18, 0xffff, v6
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v13, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v14, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    buffer_store_dword v15, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v12, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v13, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v14, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v15, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v11
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v22, 0xffff, v11
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v27, 16, v13
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v25, 0xffff, v13
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v12
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, 0xffff, v12
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v13, 16, v14
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v11, 0xffff, v14
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[32:35], off, s[8:11], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v38, 16, v28
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v36, 0xffff, v28
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v41, 16, v30
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v39, 0xffff, v30
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v45, 16, v29
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v43, 0xffff, v29
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v49, 16, v31
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v47, 0xffff, v31
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v39
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, v39
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v31, 16, v32
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v29, 0xffff, v32
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v53, 16, v34
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v51, 0xffff, v34
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v57, 16, v33
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v55, 0xffff, v33
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v60, 0
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v61, 16, v35
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v59, 0xffff, v35
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, v60
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload
-; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, v60
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v40, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v42, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:160
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v37, v60
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v39, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:128
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v28, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:96
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v60
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:64
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v44, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v46, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v60
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v62, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v56, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v58, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v48, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v50, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v52, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v54, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v30, v60
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v32, v60
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:240
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[55:58], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[29:32], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64:
@@ -7973,106 +7960,108 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s6
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s7
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s4
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s5
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0
+; GCN-NOHSA-SI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(3)
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v20, 16, v10
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v19, 16, v8
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v18, 16, v2
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, v3
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, v7
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, v11
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, v15
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v18, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[2:3], 48
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:240
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[0:1], 48
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v1, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v16, 16, v6
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v20, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v10, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 31, v22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:224
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v24, 16, v4
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v22, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[6:7], 48
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v21, v19, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v8, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v22, 31, v21
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:192
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v25, 16, v14
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[4:5], 48
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v5, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v18, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v2, 0, 16
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v22, 16, v10
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:160
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v23, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[10:11], 48
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v21, 16, v12
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v17, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v0, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[20:21], v[8:9], 48
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v18, v9, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v8
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v17, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v12, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v21, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v16, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v6, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 31, v22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[19:20], v[14:15], 48
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v14, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v22, v25, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v25, v24, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v23, v4, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v26, 31, v25
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[19:20], v[12:13], 48
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v13, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:16
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v14
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[26:27], v[12:13], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v24, v13, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, v15
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[14:15], v[14:15], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v12, v12, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v23, 31, v22
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v16, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v2, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:224
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v12
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v3, v1, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v1, v12, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v5, v14, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v7, v7, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v11, v9, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v9, v8, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v13, v10, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[22:23], v[8:9], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v20, v9, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, v11
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[10:11], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v8, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v15, v22, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v19, v24, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v17, v4, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v21, v6, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v23, v2, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v25, v0, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v27, v2, 0, 16
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[10:11], v[4:5], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v8, v5, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, v7
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[6:7], v[6:7], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v4, v4, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[30:31], v[0:1], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v28, v1, 0, 16
+; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, v3
+; GCN-NOHSA-SI-NEXT:    v_ashr_i64 v[2:3], v[2:3], 48
+; GCN-NOHSA-SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v22, 31, v21
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v26, 31, v25
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
+; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v24, 31, v23
-; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v28, 31, v27
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:192
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:160
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32
-; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[1:4], off, s[0:3], 0
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v25, 31, v24
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v29, 31, v28
+; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:144
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_endpgm
 ;
 ; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
index 55f0773f7e05aea..5b11a1fb2222a51 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
@@ -2343,24 +2343,24 @@ define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %ou
 ; SI-NOHSA-NEXT:    v_mov_b32_e32 v4, v10
 ; SI-NOHSA-NEXT:    v_mov_b32_e32 v6, v11
 ; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v15
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v14
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v35, 31, v13
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v33, 31, v12
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v32, v12
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v34, v13
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v8, v14
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v10, v15
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v13
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v12
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v8, v12
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v10, v13
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v35, 31, v15
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v33, 31, v14
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v32, v14
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v34, v15
 ; SI-NOHSA-NEXT:    s_mov_b32 s0, s4
 ; SI-NOHSA-NEXT:    s_mov_b32 s1, s5
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:64
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:16
 ; SI-NOHSA-NEXT:    s_endpgm
 ;
 ; GCNX3-HSA-LABEL: global_sextload_v16i32_to_v16i64:
@@ -2991,122 +2991,115 @@ define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %ou
 define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-NOHSA-LABEL: global_sextload_v32i32_to_v32i64:
 ; SI-NOHSA:       ; %bb.0:
-; SI-NOHSA-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; SI-NOHSA-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; SI-NOHSA-NEXT:    s_mov_b32 s14, -1
-; SI-NOHSA-NEXT:    s_mov_b32 s15, 0xe8f000
-; SI-NOHSA-NEXT:    s_add_u32 s12, s12, s3
-; SI-NOHSA-NEXT:    s_addc_u32 s13, s13, 0
-; SI-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
 ; SI-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NOHSA-NEXT:    s_mov_b32 s2, -1
-; SI-NOHSA-NEXT:    s_mov_b32 s10, s2
-; SI-NOHSA-NEXT:    s_mov_b32 s11, s3
+; SI-NOHSA-NEXT:    s_mov_b32 s6, s2
+; SI-NOHSA-NEXT:    s_mov_b32 s7, s3
 ; SI-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NOHSA-NEXT:    s_mov_b32 s8, s6
-; SI-NOHSA-NEXT:    s_mov_b32 s9, s7
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:96
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:112
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:64
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:48
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:16
-; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0
-; SI-NOHSA-NEXT:    s_waitcnt vmcnt(7)
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v47, 31, v31
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v45, 31, v30
-; SI-NOHSA-NEXT:    s_waitcnt vmcnt(6)
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v39, 31, v15
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v37, 31, v14
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v43, 31, v13
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v41, 31, v12
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v40, v12
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v42, v13
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v36, v14
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v38, v15
+; SI-NOHSA-NEXT:    s_mov_b32 s4, s10
+; SI-NOHSA-NEXT:    s_mov_b32 s5, s11
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:96
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:112
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:64
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(3)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v19
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v18
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(2)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v7, 31, v15
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v14
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v11, 31, v13
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v9, 31, v12
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v8, v12
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v10, v13
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v4, v14
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v6, v15
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v15, 31, v17
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v13, 31, v16
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v12, v16
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v14, v17
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v0, v18
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v2, v19
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(1)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v35, 31, v23
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v33, 31, v22
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v19, 31, v21
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v20
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v16, v20
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v18, v21
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v32, v22
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v34, v23
+; SI-NOHSA-NEXT:    s_mov_b32 s0, s8
+; SI-NOHSA-NEXT:    s_mov_b32 s1, s9
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:16
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:32
+; SI-NOHSA-NEXT:    buffer_load_dwordx4 v[40:43], off, s[4:7], 0 offset:48
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:176
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(5) expcnt(0)
 ; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v35, 31, v29
 ; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v33, 31, v28
 ; SI-NOHSA-NEXT:    v_mov_b32_e32 v32, v28
 ; SI-NOHSA-NEXT:    v_mov_b32_e32 v34, v29
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v44, v30
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v46, v31
-; SI-NOHSA-NEXT:    buffer_store_dword v44, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
-; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
-; SI-NOHSA-NEXT:    buffer_store_dword v45, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
-; SI-NOHSA-NEXT:    buffer_store_dword v46, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
-; SI-NOHSA-NEXT:    buffer_store_dword v47, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v15, 31, v7
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v13, 31, v6
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:128
 ; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v47, 31, v5
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v45, 31, v4
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v44, v4
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v46, v5
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v12, v6
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v14, v7
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v51, 31, v1
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v49, 31, v0
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v48, v0
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v50, v1
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v4, v2
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v6, v3
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v19
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v18
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v55, 31, v17
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v53, 31, v16
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v52, v16
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v54, v17
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v0, v18
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v2, v19
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v19, 31, v23
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v17, 31, v22
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v59, 31, v21
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v57, 31, v20
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v56, v20
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v58, v21
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v16, v22
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v18, v23
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v23, 31, v27
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v21, 31, v26
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v63, 31, v25
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v61, 31, v24
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v60, v24
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v62, v25
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v20, v26
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v22, v27
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v27, 31, v11
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v25, 31, v10
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v9
-; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v8
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v8
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v9
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v24, v10
-; SI-NOHSA-NEXT:    v_mov_b32_e32 v26, v11
-; SI-NOHSA-NEXT:    s_mov_b32 s0, s4
-; SI-NOHSA-NEXT:    s_mov_b32 s1, s5
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:224
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:240
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:192
-; SI-NOHSA-NEXT:    buffer_load_dword v8, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; SI-NOHSA-NEXT:    buffer_load_dword v9, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
-; SI-NOHSA-NEXT:    buffer_load_dword v10, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; SI-NOHSA-NEXT:    buffer_load_dword v11, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
-; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:160
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:128
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:144
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:96
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:64
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:32
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48
-; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v35, 31, v31
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v33, 31, v30
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v32, v30
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v34, v31
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:144
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(3)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v41
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v40
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v40
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v41
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:96
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v43
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v42
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v42
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v43
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v37
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v36
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v36
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v37
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v39
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v38
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v38
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v39
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v25
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v24
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v24
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v25
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v31, 31, v27
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v29, 31, v26
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v28, v26
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v30, v27
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:48
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v27, 31, v21
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v25, 31, v20
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v24, v20
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v26, v21
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0
+; SI-NOHSA-NEXT:    s_waitcnt expcnt(0)
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v27, 31, v23
+; SI-NOHSA-NEXT:    v_ashrrev_i32_e32 v25, 31, v22
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v24, v22
+; SI-NOHSA-NEXT:    v_mov_b32_e32 v26, v23
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
+; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:160
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:16
 ; SI-NOHSA-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
index ccc36530c7957bd..ccdcfa9435d4ee4 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll
@@ -1854,87 +1854,87 @@ define void @caller_mix() {
   ; 32BIT-NEXT:   renamable $r7 = LIS 16313
   ; 32BIT-NEXT:   renamable $r8 = LIS 16329
   ; 32BIT-NEXT:   renamable $r9 = LIS 13107
-  ; 32BIT-NEXT:   renamable $r10 = LIS 16339
   ; 32BIT-NEXT:   STW renamable $r3, 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r11 = LIS 16345
+  ; 32BIT-NEXT:   renamable $r10 = LIS 16339
   ; 32BIT-NEXT:   STW killed renamable $r4, 88, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r4 = LIS 16355
+  ; 32BIT-NEXT:   renamable $r4 = LIS 16345
   ; 32BIT-NEXT:   STW killed renamable $r3, 132, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r3 = LIS 26214
+  ; 32BIT-NEXT:   renamable $r3 = LIS 16355
   ; 32BIT-NEXT:   STW killed renamable $r5, 128, $r1 :: (store (s32), align 8)
   ; 32BIT-NEXT:   renamable $r5 = ORI killed renamable $r6, 39322
+  ; 32BIT-NEXT:   STW renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r6 = LIS 26214
+  ; 32BIT-NEXT:   STW renamable $r5, 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r9 = ORI killed renamable $r9, 13107
+  ; 32BIT-NEXT:   STW renamable $r9, 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r6 = ORI killed renamable $r6, 26214
+  ; 32BIT-NEXT:   STW renamable $r6, 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r11 = LIS 16358
   ; 32BIT-NEXT:   STW renamable $r5, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r6 = ORI killed renamable $r7, 39321
-  ; 32BIT-NEXT:   STW killed renamable $r6, 56, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r6 = LIS 16358
+  ; 32BIT-NEXT:   renamable $r7 = ORI killed renamable $r7, 39321
+  ; 32BIT-NEXT:   STW killed renamable $r7, 56, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r7 = LIS 16361
   ; 32BIT-NEXT:   STW renamable $r5, 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r7 = ORI killed renamable $r8, 39321
-  ; 32BIT-NEXT:   STW killed renamable $r7, 64, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r7 = ORI killed renamable $r9, 13107
-  ; 32BIT-NEXT:   STW renamable $r7, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r8 = ORI killed renamable $r10, 13107
-  ; 32BIT-NEXT:   STW killed renamable $r8, 72, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r8 = LIS 16361
-  ; 32BIT-NEXT:   STW renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r9 = ORI killed renamable $r11, 39321
-  ; 32BIT-NEXT:   STW killed renamable $r9, 80, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r9 = LIS 52428
-  ; 32BIT-NEXT:   STW renamable $r7, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 13107
-  ; 32BIT-NEXT:   STW killed renamable $r4, 96, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r3 = ORI killed renamable $r3, 26214
-  ; 32BIT-NEXT:   STW renamable $r3, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r6, 26214
-  ; 32BIT-NEXT:   STW killed renamable $r4, 104, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r4 = LIS 16364
-  ; 32BIT-NEXT:   STW renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r6 = ORI killed renamable $r8, 39321
-  ; 32BIT-NEXT:   STW killed renamable $r6, 112, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r6 = ORI killed renamable $r9, 52429
-  ; 32BIT-NEXT:   STW renamable $r6, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 52428
-  ; 32BIT-NEXT:   STW killed renamable $r4, 120, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r8 = ORI killed renamable $r8, 39321
+  ; 32BIT-NEXT:   STW killed renamable $r8, 64, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r8 = LIS 52428
+  ; 32BIT-NEXT:   STW renamable $r9, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r10 = ORI killed renamable $r10, 13107
+  ; 32BIT-NEXT:   STW killed renamable $r10, 72, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r10 = LIS 16364
+  ; 32BIT-NEXT:   STW killed renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 39321
+  ; 32BIT-NEXT:   STW killed renamable $r4, 80, $r1 :: (store (s32), align 8)
   ; 32BIT-NEXT:   renamable $r4 = LIS 16369
-  ; 32BIT-NEXT:   STW killed renamable $r5, 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   STW killed renamable $r9, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r3 = ORI killed renamable $r3, 13107
+  ; 32BIT-NEXT:   STW killed renamable $r3, 96, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r3 = LIS 16371
+  ; 32BIT-NEXT:   STW killed renamable $r6, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r5 = ORI killed renamable $r11, 26214
+  ; 32BIT-NEXT:   STW killed renamable $r5, 104, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r5 = ORI killed renamable $r7, 39321
+  ; 32BIT-NEXT:   STW killed renamable $r5, 112, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r5 = ORI killed renamable $r8, 52429
+  ; 32BIT-NEXT:   STW renamable $r5, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r6 = ORI killed renamable $r10, 52428
+  ; 32BIT-NEXT:   STW killed renamable $r6, 120, $r1 :: (store (s32), align 8)
   ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 39321
   ; 32BIT-NEXT:   STW killed renamable $r4, 136, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r4 = LIS 16371
-  ; 32BIT-NEXT:   STW killed renamable $r7, 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 13107
-  ; 32BIT-NEXT:   STW killed renamable $r4, 144, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r4 = LIS 16372
-  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   STW killed renamable $r6, 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
-  ; 32BIT-NEXT:   renamable $r4 = ORI killed renamable $r4, 52428
-  ; 32BIT-NEXT:   STW killed renamable $r4, 152, $r1 :: (store (s32), align 8)
-  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.1, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   STW killed renamable $r3, 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r3 = ORI killed renamable $r3, 13107
+  ; 32BIT-NEXT:   STW killed renamable $r3, 144, $r1 :: (store (s32), align 8)
+  ; 32BIT-NEXT:   renamable $r3 = LIS 16372
+  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $r6 = LWZtoc %const.1, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   STW killed renamable $r5, 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8)
+  ; 32BIT-NEXT:   renamable $r3 = ORI killed renamable $r3, 52428
+  ; 32BIT-NEXT:   STW killed renamable $r3, 152, $r1 :: (store (s32), align 8)
   ; 32BIT-NEXT:   renamable $r3 = LWZtoc %const.2, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f1 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.3, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f2 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.4, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f1 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.3, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f2 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.4, $r2 :: (load (s32) from got)
   ; 32BIT-NEXT:   renamable $f3 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool)
   ; 32BIT-NEXT:   renamable $r3 = LWZtoc %const.5, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f4 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.6, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f6 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.7, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f4 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.6, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f6 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.7, $r2 :: (load (s32) from got)
   ; 32BIT-NEXT:   renamable $f7 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool)
   ; 32BIT-NEXT:   renamable $r3 = LWZtoc %const.8, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f8 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.9, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f9 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.10, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f8 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.9, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f9 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.10, $r2 :: (load (s32) from got)
   ; 32BIT-NEXT:   renamable $f11 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool)
   ; 32BIT-NEXT:   renamable $r3 = LWZtoc %const.11, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f12 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r5 = LWZtoc %const.12, $r2 :: (load (s32) from got)
-  ; 32BIT-NEXT:   renamable $f13 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r4 = LIS 16374
+  ; 32BIT-NEXT:   renamable $f12 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r4 = LWZtoc %const.12, $r2 :: (load (s32) from got)
+  ; 32BIT-NEXT:   renamable $f13 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r5 = LIS 16374
   ; 32BIT-NEXT:   renamable $f5 = LFS 0, killed renamable $r3 :: (load (s32) from constant-pool)
-  ; 32BIT-NEXT:   renamable $r11 = ORI killed renamable $r4, 26214
-  ; 32BIT-NEXT:   renamable $f10 = LFS 0, killed renamable $r5 :: (load (s32) from constant-pool)
+  ; 32BIT-NEXT:   renamable $r11 = ORI killed renamable $r5, 26214
+  ; 32BIT-NEXT:   renamable $f10 = LFS 0, killed renamable $r4 :: (load (s32) from constant-pool)
   ; 32BIT-NEXT:   $r3 = LI 1
   ; 32BIT-NEXT:   $r4 = LI 2
   ; 32BIT-NEXT:   $r5 = LI 3
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 78d60f06c06786b..f0f53f9359b43f6 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -1971,44 +1971,58 @@ define void @caller_mix() {
 ; ASM32PWR4-NEXT:    li 3, 0
 ; ASM32PWR4-NEXT:    stw 0, 184(1)
 ; ASM32PWR4-NEXT:    lis 4, 16352
-; ASM32PWR4-NEXT:    lis 5, 16339
-; ASM32PWR4-NEXT:    lis 6, 16364
+; ASM32PWR4-NEXT:    lis 5, 16313
+; ASM32PWR4-NEXT:    li 6, 4
 ; ASM32PWR4-NEXT:    stw 3, 92(1)
-; ASM32PWR4-NEXT:    ori 5, 5, 13107
-; ASM32PWR4-NEXT:    ori 6, 6, 52428
+; ASM32PWR4-NEXT:    ori 5, 5, 39321
+; ASM32PWR4-NEXT:    li 8, 6
 ; ASM32PWR4-NEXT:    stw 3, 132(1)
 ; ASM32PWR4-NEXT:    lis 3, 16368
-; ASM32PWR4-NEXT:    li 8, 6
 ; ASM32PWR4-NEXT:    li 9, 7
 ; ASM32PWR4-NEXT:    li 10, 8
 ; ASM32PWR4-NEXT:    stw 3, 128(1)
 ; ASM32PWR4-NEXT:    lis 3, -26215
 ; ASM32PWR4-NEXT:    ori 3, 3, 39322
-; ASM32PWR4-NEXT:    stw 4, 88(1)
-; ASM32PWR4-NEXT:    lis 4, 16313
-; ASM32PWR4-NEXT:    ori 4, 4, 39321
+; ASM32PWR4-NEXT:    stw 3, 116(1)
+; ASM32PWR4-NEXT:    stw 3, 140(1)
 ; ASM32PWR4-NEXT:    stw 3, 60(1)
 ; ASM32PWR4-NEXT:    stw 3, 68(1)
 ; ASM32PWR4-NEXT:    stw 3, 84(1)
-; ASM32PWR4-NEXT:    stw 3, 116(1)
-; ASM32PWR4-NEXT:    stw 3, 140(1)
-; ASM32PWR4-NEXT:    lis 3, 16369
+; ASM32PWR4-NEXT:    lis 3, 16345
 ; ASM32PWR4-NEXT:    ori 3, 3, 39321
-; ASM32PWR4-NEXT:    stw 4, 56(1)
-; ASM32PWR4-NEXT:    lis 4, 16329
-; ASM32PWR4-NEXT:    ori 4, 4, 39321
-; ASM32PWR4-NEXT:    stw 3, 136(1)
-; ASM32PWR4-NEXT:    lis 3, 16371
+; ASM32PWR4-NEXT:    stw 3, 80(1)
+; ASM32PWR4-NEXT:    lis 3, 16355
 ; ASM32PWR4-NEXT:    ori 3, 3, 13107
-; ASM32PWR4-NEXT:    stw 4, 64(1)
+; ASM32PWR4-NEXT:    stw 3, 96(1)
+; ASM32PWR4-NEXT:    lis 3, 26214
+; ASM32PWR4-NEXT:    ori 7, 3, 26214
+; ASM32PWR4-NEXT:    lis 3, 16358
+; ASM32PWR4-NEXT:    stw 4, 88(1)
 ; ASM32PWR4-NEXT:    lis 4, 13107
 ; ASM32PWR4-NEXT:    ori 4, 4, 13107
-; ASM32PWR4-NEXT:    stw 3, 144(1)
-; ASM32PWR4-NEXT:    lis 3, 16372
-; ASM32PWR4-NEXT:    ori 3, 3, 52428
+; ASM32PWR4-NEXT:    ori 3, 3, 26214
+; ASM32PWR4-NEXT:    stw 4, 148(1)
 ; ASM32PWR4-NEXT:    stw 4, 76(1)
 ; ASM32PWR4-NEXT:    stw 4, 100(1)
-; ASM32PWR4-NEXT:    stw 4, 148(1)
+; ASM32PWR4-NEXT:    lis 4, 16364
+; ASM32PWR4-NEXT:    ori 4, 4, 52428
+; ASM32PWR4-NEXT:    stw 3, 104(1)
+; ASM32PWR4-NEXT:    lis 3, 16361
+; ASM32PWR4-NEXT:    ori 3, 3, 39321
+; ASM32PWR4-NEXT:    stw 3, 112(1)
+; ASM32PWR4-NEXT:    lis 3, -13108
+; ASM32PWR4-NEXT:    ori 3, 3, 52429
+; ASM32PWR4-NEXT:    stw 4, 120(1)
+; ASM32PWR4-NEXT:    lis 4, 16369
+; ASM32PWR4-NEXT:    ori 4, 4, 39321
+; ASM32PWR4-NEXT:    stw 3, 124(1)
+; ASM32PWR4-NEXT:    stw 4, 136(1)
+; ASM32PWR4-NEXT:    lis 4, 16371
+; ASM32PWR4-NEXT:    ori 4, 4, 13107
+; ASM32PWR4-NEXT:    stw 3, 156(1)
+; ASM32PWR4-NEXT:    lis 3, 16372
+; ASM32PWR4-NEXT:    ori 3, 3, 52428
+; ASM32PWR4-NEXT:    stw 4, 144(1)
 ; ASM32PWR4-NEXT:    lwz 4, L..C40(2) # %const.0
 ; ASM32PWR4-NEXT:    stw 3, 152(1)
 ; ASM32PWR4-NEXT:    lwz 3, L..C41(2) # %const.1
@@ -2016,37 +2030,25 @@ define void @caller_mix() {
 ; ASM32PWR4-NEXT:    lwz 4, L..C42(2) # %const.2
 ; ASM32PWR4-NEXT:    lfd 2, 0(3)
 ; ASM32PWR4-NEXT:    lwz 3, L..C43(2) # %const.3
-; ASM32PWR4-NEXT:    stw 5, 72(1)
-; ASM32PWR4-NEXT:    lis 5, 16345
-; ASM32PWR4-NEXT:    ori 5, 5, 39321
-; ASM32PWR4-NEXT:    stw 5, 80(1)
-; ASM32PWR4-NEXT:    lis 5, 16355
-; ASM32PWR4-NEXT:    ori 5, 5, 13107
 ; ASM32PWR4-NEXT:    lfd 3, 0(4)
 ; ASM32PWR4-NEXT:    lwz 4, L..C44(2) # %const.4
 ; ASM32PWR4-NEXT:    lfd 4, 0(3)
 ; ASM32PWR4-NEXT:    lwz 3, L..C45(2) # %const.5
-; ASM32PWR4-NEXT:    stw 5, 96(1)
-; ASM32PWR4-NEXT:    lis 5, 26214
-; ASM32PWR4-NEXT:    ori 7, 5, 26214
-; ASM32PWR4-NEXT:    lis 5, 16358
 ; ASM32PWR4-NEXT:    lfd 6, 0(4)
 ; ASM32PWR4-NEXT:    lwz 4, L..C46(2) # %const.6
-; ASM32PWR4-NEXT:    ori 5, 5, 26214
 ; ASM32PWR4-NEXT:    lfd 7, 0(3)
 ; ASM32PWR4-NEXT:    lwz 3, L..C47(2) # %const.7
-; ASM32PWR4-NEXT:    stw 5, 104(1)
-; ASM32PWR4-NEXT:    lis 5, 16361
+; ASM32PWR4-NEXT:    stw 5, 56(1)
+; ASM32PWR4-NEXT:    lis 5, 16329
 ; ASM32PWR4-NEXT:    ori 5, 5, 39321
 ; ASM32PWR4-NEXT:    lfd 8, 0(4)
 ; ASM32PWR4-NEXT:    lwz 4, L..C48(2) # %const.8
 ; ASM32PWR4-NEXT:    lfd 9, 0(3)
 ; ASM32PWR4-NEXT:    lwz 3, L..C49(2) # %const.9
-; ASM32PWR4-NEXT:    stw 5, 112(1)
-; ASM32PWR4-NEXT:    lis 5, -13108
-; ASM32PWR4-NEXT:    ori 5, 5, 52429
-; ASM32PWR4-NEXT:    stw 5, 124(1)
-; ASM32PWR4-NEXT:    stw 5, 156(1)
+; ASM32PWR4-NEXT:    stw 5, 64(1)
+; ASM32PWR4-NEXT:    lis 5, 16339
+; ASM32PWR4-NEXT:    ori 5, 5, 13107
+; ASM32PWR4-NEXT:    stw 5, 72(1)
 ; ASM32PWR4-NEXT:    lwz 5, L..C50(2) # %const.12
 ; ASM32PWR4-NEXT:    lfd 11, 0(4)
 ; ASM32PWR4-NEXT:    lwz 4, L..C51(2) # %const.10
@@ -2061,8 +2063,6 @@ define void @caller_mix() {
 ; ASM32PWR4-NEXT:    lfs 10, 0(5)
 ; ASM32PWR4-NEXT:    li 5, 3
 ; ASM32PWR4-NEXT:    stw 7, 108(1)
-; ASM32PWR4-NEXT:    stw 6, 120(1)
-; ASM32PWR4-NEXT:    li 6, 4
 ; ASM32PWR4-NEXT:    stw 7, 164(1)
 ; ASM32PWR4-NEXT:    li 7, 5
 ; ASM32PWR4-NEXT:    stw 11, 160(1)
diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
index c6d6f6a17b1b50e..5e7c3357b6d68ae 100644
--- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll
+++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
@@ -65,90 +65,90 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i8:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
-; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    lbz 4, 115(1)
 ; PPC32-NEXT:    lbz 22, 119(1)
-; PPC32-NEXT:    lbz 21, 123(1)
 ; PPC32-NEXT:    add 4, 4, 5
+; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 5, 22, 6
-; PPC32-NEXT:    lbz 22, 131(1)
-; PPC32-NEXT:    add 6, 21, 7
-; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 22, 9
-; PPC32-NEXT:    lbz 20, 127(1)
-; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    lbz 22, 135(1)
 ; PPC32-NEXT:    addi 5, 5, 1
+; PPC32-NEXT:    lbz 6, 131(1)
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 10, 22, 10
 ; PPC32-NEXT:    lbz 25, 83(1)
-; PPC32-NEXT:    add 7, 20, 8
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    addi 7, 7, 1
+; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    lbz 21, 123(1)
+; PPC32-NEXT:    addi 6, 6, 1
+; PPC32-NEXT:    lbz 22, 147(1)
 ; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    add 7, 21, 7
 ; PPC32-NEXT:    lbz 24, 79(1)
-; PPC32-NEXT:    add 25, 21, 25
-; PPC32-NEXT:    lbz 22, 143(1)
+; PPC32-NEXT:    add 25, 22, 25
+; PPC32-NEXT:    lbz 9, 143(1)
 ; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 23, 75(1)
-; PPC32-NEXT:    add 24, 22, 24
-; PPC32-NEXT:    lbz 8, 139(1)
 ; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 9, 9, 24
 ; PPC32-NEXT:    lbz 28, 95(1)
-; PPC32-NEXT:    add 8, 8, 23
-; PPC32-NEXT:    lbz 21, 159(1)
-; PPC32-NEXT:    addi 8, 8, 1
+; PPC32-NEXT:    addi 9, 9, 1
+; PPC32-NEXT:    lbz 23, 75(1)
+; PPC32-NEXT:    lbz 21, 139(1)
+; PPC32-NEXT:    lbz 22, 159(1)
 ; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 23, 21, 23
 ; PPC32-NEXT:    lbz 27, 91(1)
-; PPC32-NEXT:    add 28, 21, 28
-; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    add 28, 22, 28
+; PPC32-NEXT:    lbz 24, 155(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 26, 87(1)
-; PPC32-NEXT:    add 27, 22, 27
-; PPC32-NEXT:    lbz 23, 151(1)
 ; PPC32-NEXT:    lbz 11, 111(1)
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    add 26, 23, 26
+; PPC32-NEXT:    add 27, 24, 27
+; PPC32-NEXT:    lbz 26, 87(1)
+; PPC32-NEXT:    lbz 21, 151(1)
+; PPC32-NEXT:    lbz 22, 175(1)
 ; PPC32-NEXT:    lbz 12, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
 ; PPC32-NEXT:    lbz 0, 171(1)
-; PPC32-NEXT:    add 11, 21, 11
+; PPC32-NEXT:    add 11, 22, 11
 ; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    addi 11, 11, 1
 ; PPC32-NEXT:    lbz 30, 103(1)
 ; PPC32-NEXT:    add 12, 0, 12
-; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    lbz 24, 167(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    lbz 29, 99(1)
-; PPC32-NEXT:    add 30, 22, 30
-; PPC32-NEXT:    lbz 23, 163(1)
-; PPC32-NEXT:    stb 11, 15(3)
-; PPC32-NEXT:    addi 11, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 11, 14(3)
-; PPC32-NEXT:    addi 11, 30, 1
-; PPC32-NEXT:    stb 11, 13(3)
-; PPC32-NEXT:    addi 11, 29, 1
-; PPC32-NEXT:    stb 11, 12(3)
-; PPC32-NEXT:    addi 11, 28, 1
-; PPC32-NEXT:    stb 11, 11(3)
-; PPC32-NEXT:    addi 11, 27, 1
-; PPC32-NEXT:    stb 11, 10(3)
-; PPC32-NEXT:    addi 11, 26, 1
-; PPC32-NEXT:    stb 11, 9(3)
-; PPC32-NEXT:    addi 11, 25, 1
-; PPC32-NEXT:    stb 8, 6(3)
-; PPC32-NEXT:    addi 8, 10, 1
-; PPC32-NEXT:    stb 11, 8(3)
-; PPC32-NEXT:    addi 11, 24, 1
-; PPC32-NEXT:    stb 8, 5(3)
-; PPC32-NEXT:    addi 8, 9, 1
-; PPC32-NEXT:    stb 11, 7(3)
-; PPC32-NEXT:    stb 8, 4(3)
-; PPC32-NEXT:    stb 7, 3(3)
-; PPC32-NEXT:    stb 6, 2(3)
+; PPC32-NEXT:    add 30, 24, 30
+; PPC32-NEXT:    lbz 21, 163(1)
+; PPC32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stb 5, 1(3)
+; PPC32-NEXT:    addi 5, 11, 1
+; PPC32-NEXT:    stb 5, 15(3)
+; PPC32-NEXT:    addi 5, 12, 1
+; PPC32-NEXT:    lbz 20, 127(1)
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    stb 5, 14(3)
+; PPC32-NEXT:    addi 5, 30, 1
+; PPC32-NEXT:    stb 5, 13(3)
+; PPC32-NEXT:    addi 5, 29, 1
+; PPC32-NEXT:    stb 5, 12(3)
+; PPC32-NEXT:    addi 5, 28, 1
+; PPC32-NEXT:    stb 5, 11(3)
+; PPC32-NEXT:    addi 5, 27, 1
+; PPC32-NEXT:    add 8, 20, 8
+; PPC32-NEXT:    stb 9, 7(3)
+; PPC32-NEXT:    addi 9, 23, 1
+; PPC32-NEXT:    stb 6, 4(3)
+; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    stb 5, 10(3)
+; PPC32-NEXT:    addi 5, 26, 1
+; PPC32-NEXT:    stb 9, 6(3)
+; PPC32-NEXT:    addi 9, 10, 1
+; PPC32-NEXT:    stb 6, 3(3)
+; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    stb 5, 9(3)
+; PPC32-NEXT:    addi 5, 25, 1
+; PPC32-NEXT:    stb 9, 5(3)
+; PPC32-NEXT:    stb 6, 2(3)
+; PPC32-NEXT:    stb 5, 8(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -184,70 +184,70 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; AIX-PPC64-NEXT:    add 5, 23, 5
 ; AIX-PPC64-NEXT:    lbz 23, 223(1)
 ; AIX-PPC64-NEXT:    add 4, 24, 4
-; AIX-PPC64-NEXT:    lbz 24, 215(1)
-; AIX-PPC64-NEXT:    add 9, 22, 9
 ; AIX-PPC64-NEXT:    lbz 26, 127(1)
+; AIX-PPC64-NEXT:    add 9, 22, 9
+; AIX-PPC64-NEXT:    lbz 24, 215(1)
 ; AIX-PPC64-NEXT:    add 8, 23, 8
 ; AIX-PPC64-NEXT:    lbz 22, 255(1)
-; AIX-PPC64-NEXT:    add 7, 24, 7
+; AIX-PPC64-NEXT:    addi 5, 5, 1
 ; AIX-PPC64-NEXT:    lbz 25, 119(1)
-; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    add 7, 24, 7
 ; AIX-PPC64-NEXT:    lbz 23, 247(1)
 ; AIX-PPC64-NEXT:    add 26, 22, 26
-; AIX-PPC64-NEXT:    lbz 24, 239(1)
-; AIX-PPC64-NEXT:    addi 8, 8, 1
 ; AIX-PPC64-NEXT:    lbz 29, 151(1)
+; AIX-PPC64-NEXT:    addi 9, 9, 1
+; AIX-PPC64-NEXT:    lbz 24, 239(1)
 ; AIX-PPC64-NEXT:    add 25, 23, 25
 ; AIX-PPC64-NEXT:    lbz 22, 279(1)
-; AIX-PPC64-NEXT:    add 10, 24, 10
+; AIX-PPC64-NEXT:    addi 8, 8, 1
 ; AIX-PPC64-NEXT:    lbz 28, 143(1)
-; AIX-PPC64-NEXT:    addi 10, 10, 1
+; AIX-PPC64-NEXT:    add 10, 24, 10
 ; AIX-PPC64-NEXT:    lbz 23, 271(1)
 ; AIX-PPC64-NEXT:    add 29, 22, 29
+; AIX-PPC64-NEXT:    lbz 11, 183(1)
+; AIX-PPC64-NEXT:    addi 10, 10, 1
 ; AIX-PPC64-NEXT:    lbz 27, 135(1)
-; AIX-PPC64-NEXT:    addi 7, 7, 1
-; AIX-PPC64-NEXT:    lbz 24, 263(1)
 ; AIX-PPC64-NEXT:    add 28, 23, 28
-; AIX-PPC64-NEXT:    lbz 11, 183(1)
-; AIX-PPC64-NEXT:    addi 6, 6, 1
+; AIX-PPC64-NEXT:    lbz 24, 263(1)
+; AIX-PPC64-NEXT:    addi 7, 7, 1
 ; AIX-PPC64-NEXT:    lbz 22, 311(1)
-; AIX-PPC64-NEXT:    add 27, 24, 27
+; AIX-PPC64-NEXT:    addi 6, 6, 1
 ; AIX-PPC64-NEXT:    lbz 12, 175(1)
-; AIX-PPC64-NEXT:    addi 5, 5, 1
+; AIX-PPC64-NEXT:    add 27, 24, 27
 ; AIX-PPC64-NEXT:    lbz 0, 303(1)
 ; AIX-PPC64-NEXT:    add 11, 22, 11
 ; AIX-PPC64-NEXT:    lbz 31, 167(1)
-; AIX-PPC64-NEXT:    addi 11, 11, 1
+; AIX-PPC64-NEXT:    addi 4, 4, 1
 ; AIX-PPC64-NEXT:    lbz 23, 295(1)
 ; AIX-PPC64-NEXT:    add 12, 0, 12
 ; AIX-PPC64-NEXT:    lbz 30, 159(1)
-; AIX-PPC64-NEXT:    addi 4, 4, 1
+; AIX-PPC64-NEXT:    addi 0, 25, 1
 ; AIX-PPC64-NEXT:    lbz 24, 287(1)
 ; AIX-PPC64-NEXT:    add 31, 23, 31
-; AIX-PPC64-NEXT:    stb 11, 15(3)
-; AIX-PPC64-NEXT:    addi 11, 12, 1
+; AIX-PPC64-NEXT:    stb 5, 1(3)
+; AIX-PPC64-NEXT:    addi 5, 11, 1
+; AIX-PPC64-NEXT:    stb 5, 15(3)
+; AIX-PPC64-NEXT:    addi 5, 12, 1
+; AIX-PPC64-NEXT:    stb 5, 14(3)
+; AIX-PPC64-NEXT:    addi 5, 31, 1
 ; AIX-PPC64-NEXT:    add 30, 24, 30
-; AIX-PPC64-NEXT:    stb 11, 14(3)
-; AIX-PPC64-NEXT:    addi 11, 31, 1
-; AIX-PPC64-NEXT:    stb 11, 13(3)
-; AIX-PPC64-NEXT:    addi 11, 30, 1
-; AIX-PPC64-NEXT:    stb 11, 12(3)
-; AIX-PPC64-NEXT:    addi 11, 29, 1
-; AIX-PPC64-NEXT:    stb 11, 11(3)
-; AIX-PPC64-NEXT:    addi 11, 28, 1
-; AIX-PPC64-NEXT:    stb 11, 10(3)
-; AIX-PPC64-NEXT:    addi 11, 27, 1
-; AIX-PPC64-NEXT:    stb 11, 9(3)
-; AIX-PPC64-NEXT:    addi 11, 26, 1
-; AIX-PPC64-NEXT:    stb 11, 8(3)
-; AIX-PPC64-NEXT:    addi 11, 25, 1
-; AIX-PPC64-NEXT:    stb 11, 7(3)
+; AIX-PPC64-NEXT:    stb 5, 13(3)
+; AIX-PPC64-NEXT:    addi 5, 30, 1
+; AIX-PPC64-NEXT:    stb 5, 12(3)
+; AIX-PPC64-NEXT:    addi 5, 29, 1
+; AIX-PPC64-NEXT:    stb 5, 11(3)
+; AIX-PPC64-NEXT:    addi 5, 28, 1
+; AIX-PPC64-NEXT:    stb 5, 10(3)
+; AIX-PPC64-NEXT:    addi 5, 27, 1
+; AIX-PPC64-NEXT:    stb 5, 9(3)
+; AIX-PPC64-NEXT:    addi 5, 26, 1
+; AIX-PPC64-NEXT:    stb 0, 7(3)
 ; AIX-PPC64-NEXT:    stb 10, 6(3)
 ; AIX-PPC64-NEXT:    stb 9, 5(3)
 ; AIX-PPC64-NEXT:    stb 8, 4(3)
 ; AIX-PPC64-NEXT:    stb 7, 3(3)
 ; AIX-PPC64-NEXT:    stb 6, 2(3)
-; AIX-PPC64-NEXT:    stb 5, 1(3)
+; AIX-PPC64-NEXT:    stb 5, 8(3)
 ; AIX-PPC64-NEXT:    stb 4, 0(3)
 ; AIX-PPC64-NEXT:    ld 31, -8(1) # 8-byte Folded Reload
 ; AIX-PPC64-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 7a6640fea2d1e42..b09ba87d23e153e 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -923,9 +923,9 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR7-NEXT:    stb r12, 176(r1)
 ; CHECK-PWR7-NEXT:    sub r0, r0, r30
 ; CHECK-PWR7-NEXT:    lbz r30, 314(r1)
-; CHECK-PWR7-NEXT:    stb r11, 160(r1)
-; CHECK-PWR7-NEXT:    sub r30, r30, r29
 ; CHECK-PWR7-NEXT:    stb r0, 192(r1)
+; CHECK-PWR7-NEXT:    sub r30, r30, r29
+; CHECK-PWR7-NEXT:    stb r11, 160(r1)
 ; CHECK-PWR7-NEXT:    stb r10, 144(r1)
 ; CHECK-PWR7-NEXT:    stb r9, 128(r1)
 ; CHECK-PWR7-NEXT:    stb r8, 112(r1)
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index f699ea54192d88f..582c2f21233ecb2 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -780,8 +780,8 @@ define <4 x i128> @sadd(<4 x i128> %a, <4 x i128> %b) local_unnamed_addr {
 ; CHECK-NEXT:    cmpd 7, 3, 4
 ; CHECK-NEXT:    mfvsrd 4, 42
 ; CHECK-NEXT:    sradi 3, 3, 63
-; CHECK-NEXT:    mtocrf 32, 12
 ; CHECK-NEXT:    crnor 21, 22, 21
+; CHECK-NEXT:    mtocrf 32, 12
 ; CHECK-NEXT:    crandc 23, 28, 26
 ; CHECK-NEXT:    crand 24, 26, 0
 ; CHECK-NEXT:    cmpld 4, 5
diff --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
index 9cd2ec55108862d..c08107a009f2704 100644
--- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll
+++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
@@ -64,90 +64,90 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i8:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
-; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    lbz 4, 115(1)
 ; PPC32-NEXT:    lbz 22, 119(1)
-; PPC32-NEXT:    lbz 21, 123(1)
 ; PPC32-NEXT:    add 4, 4, 5
+; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 5, 22, 6
-; PPC32-NEXT:    lbz 22, 131(1)
-; PPC32-NEXT:    add 6, 21, 7
-; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 22, 9
-; PPC32-NEXT:    lbz 20, 127(1)
-; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    lbz 22, 135(1)
 ; PPC32-NEXT:    addi 5, 5, 1
+; PPC32-NEXT:    lbz 6, 131(1)
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 10, 22, 10
 ; PPC32-NEXT:    lbz 25, 83(1)
-; PPC32-NEXT:    add 7, 20, 8
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    addi 7, 7, 1
+; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    lbz 21, 123(1)
+; PPC32-NEXT:    addi 6, 6, 1
+; PPC32-NEXT:    lbz 22, 147(1)
 ; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    add 7, 21, 7
 ; PPC32-NEXT:    lbz 24, 79(1)
-; PPC32-NEXT:    add 25, 21, 25
-; PPC32-NEXT:    lbz 22, 143(1)
+; PPC32-NEXT:    add 25, 22, 25
+; PPC32-NEXT:    lbz 9, 143(1)
 ; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 23, 75(1)
-; PPC32-NEXT:    add 24, 22, 24
-; PPC32-NEXT:    lbz 8, 139(1)
 ; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 9, 9, 24
 ; PPC32-NEXT:    lbz 28, 95(1)
-; PPC32-NEXT:    add 8, 8, 23
-; PPC32-NEXT:    lbz 21, 159(1)
-; PPC32-NEXT:    addi 8, 8, 1
+; PPC32-NEXT:    addi 9, 9, 1
+; PPC32-NEXT:    lbz 23, 75(1)
+; PPC32-NEXT:    lbz 21, 139(1)
+; PPC32-NEXT:    lbz 22, 159(1)
 ; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 23, 21, 23
 ; PPC32-NEXT:    lbz 27, 91(1)
-; PPC32-NEXT:    add 28, 21, 28
-; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    add 28, 22, 28
+; PPC32-NEXT:    lbz 24, 155(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 26, 87(1)
-; PPC32-NEXT:    add 27, 22, 27
-; PPC32-NEXT:    lbz 23, 151(1)
 ; PPC32-NEXT:    lbz 11, 111(1)
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    add 26, 23, 26
+; PPC32-NEXT:    add 27, 24, 27
+; PPC32-NEXT:    lbz 26, 87(1)
+; PPC32-NEXT:    lbz 21, 151(1)
+; PPC32-NEXT:    lbz 22, 175(1)
 ; PPC32-NEXT:    lbz 12, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
 ; PPC32-NEXT:    lbz 0, 171(1)
-; PPC32-NEXT:    add 11, 21, 11
+; PPC32-NEXT:    add 11, 22, 11
 ; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    addi 11, 11, 1
 ; PPC32-NEXT:    lbz 30, 103(1)
 ; PPC32-NEXT:    add 12, 0, 12
-; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    lbz 24, 167(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    lbz 29, 99(1)
-; PPC32-NEXT:    add 30, 22, 30
-; PPC32-NEXT:    lbz 23, 163(1)
-; PPC32-NEXT:    stb 11, 15(3)
-; PPC32-NEXT:    addi 11, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 11, 14(3)
-; PPC32-NEXT:    addi 11, 30, 1
-; PPC32-NEXT:    stb 11, 13(3)
-; PPC32-NEXT:    addi 11, 29, 1
-; PPC32-NEXT:    stb 11, 12(3)
-; PPC32-NEXT:    addi 11, 28, 1
-; PPC32-NEXT:    stb 11, 11(3)
-; PPC32-NEXT:    addi 11, 27, 1
-; PPC32-NEXT:    stb 11, 10(3)
-; PPC32-NEXT:    addi 11, 26, 1
-; PPC32-NEXT:    stb 11, 9(3)
-; PPC32-NEXT:    addi 11, 25, 1
-; PPC32-NEXT:    stb 8, 6(3)
-; PPC32-NEXT:    addi 8, 10, 1
-; PPC32-NEXT:    stb 11, 8(3)
-; PPC32-NEXT:    addi 11, 24, 1
-; PPC32-NEXT:    stb 8, 5(3)
-; PPC32-NEXT:    addi 8, 9, 1
-; PPC32-NEXT:    stb 11, 7(3)
-; PPC32-NEXT:    stb 8, 4(3)
-; PPC32-NEXT:    stb 7, 3(3)
-; PPC32-NEXT:    stb 6, 2(3)
+; PPC32-NEXT:    add 30, 24, 30
+; PPC32-NEXT:    lbz 21, 163(1)
+; PPC32-NEXT:    stw 20, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stb 5, 1(3)
+; PPC32-NEXT:    addi 5, 11, 1
+; PPC32-NEXT:    stb 5, 15(3)
+; PPC32-NEXT:    addi 5, 12, 1
+; PPC32-NEXT:    lbz 20, 127(1)
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    stb 5, 14(3)
+; PPC32-NEXT:    addi 5, 30, 1
+; PPC32-NEXT:    stb 5, 13(3)
+; PPC32-NEXT:    addi 5, 29, 1
+; PPC32-NEXT:    stb 5, 12(3)
+; PPC32-NEXT:    addi 5, 28, 1
+; PPC32-NEXT:    stb 5, 11(3)
+; PPC32-NEXT:    addi 5, 27, 1
+; PPC32-NEXT:    add 8, 20, 8
+; PPC32-NEXT:    stb 9, 7(3)
+; PPC32-NEXT:    addi 9, 23, 1
+; PPC32-NEXT:    stb 6, 4(3)
+; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    stb 5, 10(3)
+; PPC32-NEXT:    addi 5, 26, 1
+; PPC32-NEXT:    stb 9, 6(3)
+; PPC32-NEXT:    addi 9, 10, 1
+; PPC32-NEXT:    stb 6, 3(3)
+; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    stb 5, 9(3)
+; PPC32-NEXT:    addi 5, 25, 1
+; PPC32-NEXT:    stb 9, 5(3)
+; PPC32-NEXT:    stb 6, 2(3)
+; PPC32-NEXT:    stb 5, 8(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -183,70 +183,70 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC64BE-NEXT:    add 5, 22, 5
 ; PPC64BE-NEXT:    lbz 22, 223(1)
 ; PPC64BE-NEXT:    add 4, 23, 4
-; PPC64BE-NEXT:    lbz 23, 215(1)
-; PPC64BE-NEXT:    add 9, 21, 9
 ; PPC64BE-NEXT:    lbz 25, 127(1)
+; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 23, 215(1)
 ; PPC64BE-NEXT:    add 8, 22, 8
 ; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    add 7, 23, 7
+; PPC64BE-NEXT:    addi 5, 5, 1
 ; PPC64BE-NEXT:    lbz 24, 119(1)
-; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    add 7, 23, 7
 ; PPC64BE-NEXT:    lbz 22, 247(1)
 ; PPC64BE-NEXT:    add 25, 21, 25
-; PPC64BE-NEXT:    lbz 23, 239(1)
-; PPC64BE-NEXT:    addi 8, 8, 1
 ; PPC64BE-NEXT:    lbz 28, 151(1)
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    lbz 23, 239(1)
 ; PPC64BE-NEXT:    add 24, 22, 24
 ; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    add 10, 23, 10
+; PPC64BE-NEXT:    addi 8, 8, 1
 ; PPC64BE-NEXT:    lbz 27, 143(1)
-; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    add 10, 23, 10
 ; PPC64BE-NEXT:    lbz 22, 271(1)
 ; PPC64BE-NEXT:    add 28, 21, 28
+; PPC64BE-NEXT:    lbz 11, 183(1)
+; PPC64BE-NEXT:    addi 10, 10, 1
 ; PPC64BE-NEXT:    lbz 26, 135(1)
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    lbz 23, 263(1)
 ; PPC64BE-NEXT:    add 27, 22, 27
-; PPC64BE-NEXT:    lbz 11, 183(1)
-; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    lbz 23, 263(1)
+; PPC64BE-NEXT:    addi 7, 7, 1
 ; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    addi 6, 6, 1
 ; PPC64BE-NEXT:    lbz 12, 175(1)
-; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    add 26, 23, 26
 ; PPC64BE-NEXT:    lbz 0, 303(1)
 ; PPC64BE-NEXT:    add 11, 21, 11
 ; PPC64BE-NEXT:    lbz 30, 167(1)
-; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    lbz 22, 295(1)
 ; PPC64BE-NEXT:    add 12, 0, 12
 ; PPC64BE-NEXT:    lbz 29, 159(1)
-; PPC64BE-NEXT:    addi 4, 4, 1
+; PPC64BE-NEXT:    addi 0, 24, 1
 ; PPC64BE-NEXT:    lbz 23, 287(1)
 ; PPC64BE-NEXT:    add 30, 22, 30
-; PPC64BE-NEXT:    stb 11, 15(3)
-; PPC64BE-NEXT:    addi 11, 12, 1
+; PPC64BE-NEXT:    stb 5, 1(3)
+; PPC64BE-NEXT:    addi 5, 11, 1
+; PPC64BE-NEXT:    stb 5, 15(3)
+; PPC64BE-NEXT:    addi 5, 12, 1
+; PPC64BE-NEXT:    stb 5, 14(3)
+; PPC64BE-NEXT:    addi 5, 30, 1
 ; PPC64BE-NEXT:    add 29, 23, 29
-; PPC64BE-NEXT:    stb 11, 14(3)
-; PPC64BE-NEXT:    addi 11, 30, 1
-; PPC64BE-NEXT:    stb 11, 13(3)
-; PPC64BE-NEXT:    addi 11, 29, 1
-; PPC64BE-NEXT:    stb 11, 12(3)
-; PPC64BE-NEXT:    addi 11, 28, 1
-; PPC64BE-NEXT:    stb 11, 11(3)
-; PPC64BE-NEXT:    addi 11, 27, 1
-; PPC64BE-NEXT:    stb 11, 10(3)
-; PPC64BE-NEXT:    addi 11, 26, 1
-; PPC64BE-NEXT:    stb 11, 9(3)
-; PPC64BE-NEXT:    addi 11, 25, 1
-; PPC64BE-NEXT:    stb 11, 8(3)
-; PPC64BE-NEXT:    addi 11, 24, 1
-; PPC64BE-NEXT:    stb 11, 7(3)
+; PPC64BE-NEXT:    stb 5, 13(3)
+; PPC64BE-NEXT:    addi 5, 29, 1
+; PPC64BE-NEXT:    stb 5, 12(3)
+; PPC64BE-NEXT:    addi 5, 28, 1
+; PPC64BE-NEXT:    stb 5, 11(3)
+; PPC64BE-NEXT:    addi 5, 27, 1
+; PPC64BE-NEXT:    stb 5, 10(3)
+; PPC64BE-NEXT:    addi 5, 26, 1
+; PPC64BE-NEXT:    stb 5, 9(3)
+; PPC64BE-NEXT:    addi 5, 25, 1
+; PPC64BE-NEXT:    stb 0, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
 ; PPC64BE-NEXT:    stb 8, 4(3)
 ; PPC64BE-NEXT:    stb 7, 3(3)
 ; PPC64BE-NEXT:    stb 6, 2(3)
-; PPC64BE-NEXT:    stb 5, 1(3)
+; PPC64BE-NEXT:    stb 5, 8(3)
 ; PPC64BE-NEXT:    stb 4, 0(3)
 ; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/testBitReverse.ll b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
index b3d5a5273717a10..cf39cbc1e2e5f62 100644
--- a/llvm/test/CodeGen/PowerPC/testBitReverse.ll
+++ b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
@@ -82,52 +82,52 @@ define i64 @testBitReverseIntrinsicI64(i64 %arg) {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    lis 5, -21846
 ; PPC32-NEXT:    lis 6, 21845
+; PPC32-NEXT:    slwi 10, 3, 1
 ; PPC32-NEXT:    ori 5, 5, 43690
-; PPC32-NEXT:    slwi 10, 4, 1
-; PPC32-NEXT:    slwi 11, 3, 1
+; PPC32-NEXT:    slwi 11, 4, 1
+; PPC32-NEXT:    srwi 3, 3, 1
 ; PPC32-NEXT:    ori 6, 6, 21845
 ; PPC32-NEXT:    srwi 4, 4, 1
-; PPC32-NEXT:    srwi 3, 3, 1
 ; PPC32-NEXT:    and 10, 10, 5
 ; PPC32-NEXT:    and 5, 11, 5
-; PPC32-NEXT:    and 4, 4, 6
 ; PPC32-NEXT:    and 3, 3, 6
+; PPC32-NEXT:    and 4, 4, 6
 ; PPC32-NEXT:    lis 7, -13108
 ; PPC32-NEXT:    lis 8, 13107
-; PPC32-NEXT:    or 4, 4, 10
-; PPC32-NEXT:    or 3, 3, 5
+; PPC32-NEXT:    or 3, 3, 10
+; PPC32-NEXT:    or 4, 4, 5
 ; PPC32-NEXT:    ori 7, 7, 52428
 ; PPC32-NEXT:    ori 8, 8, 13107
-; PPC32-NEXT:    slwi 5, 4, 2
-; PPC32-NEXT:    srwi 4, 4, 2
-; PPC32-NEXT:    slwi 6, 3, 2
+; PPC32-NEXT:    slwi 5, 3, 2
 ; PPC32-NEXT:    srwi 3, 3, 2
+; PPC32-NEXT:    slwi 6, 4, 2
+; PPC32-NEXT:    srwi 4, 4, 2
 ; PPC32-NEXT:    and 5, 5, 7
-; PPC32-NEXT:    and 4, 4, 8
-; PPC32-NEXT:    and 6, 6, 7
 ; PPC32-NEXT:    and 3, 3, 8
+; PPC32-NEXT:    and 6, 6, 7
+; PPC32-NEXT:    and 4, 4, 8
 ; PPC32-NEXT:    lis 9, -3856
 ; PPC32-NEXT:    lis 11, 3855
-; PPC32-NEXT:    or 4, 4, 5
-; PPC32-NEXT:    or 3, 3, 6
+; PPC32-NEXT:    or 3, 3, 5
+; PPC32-NEXT:    or 4, 4, 6
 ; PPC32-NEXT:    ori 9, 9, 61680
 ; PPC32-NEXT:    ori 11, 11, 3855
-; PPC32-NEXT:    slwi 5, 4, 4
-; PPC32-NEXT:    srwi 4, 4, 4
-; PPC32-NEXT:    slwi 6, 3, 4
+; PPC32-NEXT:    slwi 5, 3, 4
 ; PPC32-NEXT:    srwi 3, 3, 4
+; PPC32-NEXT:    slwi 6, 4, 4
+; PPC32-NEXT:    srwi 4, 4, 4
 ; PPC32-NEXT:    and 5, 5, 9
-; PPC32-NEXT:    and 4, 4, 11
-; PPC32-NEXT:    and 6, 6, 9
 ; PPC32-NEXT:    and 3, 3, 11
-; PPC32-NEXT:    or 5, 4, 5
-; PPC32-NEXT:    or 6, 3, 6
-; PPC32-NEXT:    rotlwi 3, 5, 24
-; PPC32-NEXT:    rotlwi 4, 6, 24
-; PPC32-NEXT:    rlwimi 3, 5, 8, 8, 15
-; PPC32-NEXT:    rlwimi 4, 6, 8, 8, 15
-; PPC32-NEXT:    rlwimi 3, 5, 8, 24, 31
-; PPC32-NEXT:    rlwimi 4, 6, 8, 24, 31
+; PPC32-NEXT:    and 6, 6, 9
+; PPC32-NEXT:    and 4, 4, 11
+; PPC32-NEXT:    or 5, 3, 5
+; PPC32-NEXT:    or 6, 4, 6
+; PPC32-NEXT:    rotlwi 4, 5, 24
+; PPC32-NEXT:    rotlwi 3, 6, 24
+; PPC32-NEXT:    rlwimi 4, 5, 8, 8, 15
+; PPC32-NEXT:    rlwimi 3, 6, 8, 8, 15
+; PPC32-NEXT:    rlwimi 3, 6, 8, 24, 31
+; PPC32-NEXT:    rlwimi 4, 5, 8, 24, 31
 ; PPC32-NEXT:    blr
 ;
 ; CHECK-LABEL: testBitReverseIntrinsicI64:
diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
index 48098e3a277c184..4986b5c209e654b 100644
--- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@@ -5,106 +5,111 @@
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC64-LABEL: muloti_test:
 ; PPC64:       # %bb.0: # %start
-; PPC64-NEXT:    addic 8, 5, -1
+; PPC64-NEXT:    addic 10, 5, -1
+; PPC64-NEXT:    mulld 7, 4, 6
+; PPC64-NEXT:    mulhdu 8, 3, 6
+; PPC64-NEXT:    subfe 10, 10, 5
 ; PPC64-NEXT:    mulhdu 9, 5, 4
-; PPC64-NEXT:    mulld 10, 5, 4
-; PPC64-NEXT:    subfe 5, 8, 5
-; PPC64-NEXT:    mulld 8, 3, 6
-; PPC64-NEXT:    add 8, 8, 10
-; PPC64-NEXT:    addic 10, 3, -1
-; PPC64-NEXT:    mulhdu 7, 3, 6
-; PPC64-NEXT:    subfe 3, 10, 3
-; PPC64-NEXT:    and 5, 3, 5
-; PPC64-NEXT:    addic 3, 7, -1
-; PPC64-NEXT:    subfe 7, 3, 7
-; PPC64-NEXT:    or 5, 5, 7
-; PPC64-NEXT:    mulhdu 10, 4, 6
-; PPC64-NEXT:    addic 7, 9, -1
-; PPC64-NEXT:    add 3, 10, 8
-; PPC64-NEXT:    subfe 7, 7, 9
-; PPC64-NEXT:    or 5, 5, 7
-; PPC64-NEXT:    subc 7, 3, 10
-; PPC64-NEXT:    subfe 7, 3, 3
-; PPC64-NEXT:    neg 7, 7
-; PPC64-NEXT:    or 5, 5, 7
-; PPC64-NEXT:    mulld 4, 4, 6
+; PPC64-NEXT:    mulld 5, 5, 4
+; PPC64-NEXT:    mulhdu 4, 4, 6
+; PPC64-NEXT:    mulld 6, 3, 6
+; PPC64-NEXT:    add 5, 6, 5
+; PPC64-NEXT:    addic 6, 3, -1
+; PPC64-NEXT:    subfe 6, 6, 3
+; PPC64-NEXT:    add 3, 4, 5
+; PPC64-NEXT:    and 5, 6, 10
+; PPC64-NEXT:    addic 6, 8, -1
+; PPC64-NEXT:    subfe 6, 6, 8
+; PPC64-NEXT:    or 5, 5, 6
+; PPC64-NEXT:    addic 6, 9, -1
+; PPC64-NEXT:    subfe 6, 6, 9
+; PPC64-NEXT:    subc 4, 3, 4
+; PPC64-NEXT:    subfe 4, 3, 3
+; PPC64-NEXT:    or 5, 5, 6
+; PPC64-NEXT:    neg 4, 4
+; PPC64-NEXT:    or 5, 5, 4
+; PPC64-NEXT:    mr 4, 7
 ; PPC64-NEXT:    blr
 ;
 ; PPC32-LABEL: muloti_test:
 ; PPC32:       # %bb.0: # %start
 ; PPC32-NEXT:    stwu 1, -64(1)
-; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mulhwu. 26, 7, 6
+; PPC32-NEXT:    mulhwu. 12, 7, 6
 ; PPC32-NEXT:    mcrf 1, 0
-; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    cmpwi 7, 5, 0
-; PPC32-NEXT:    cmpwi 2, 7, 0
-; PPC32-NEXT:    mulhwu. 26, 5, 8
-; PPC32-NEXT:    mcrf 5, 0
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    crnor 20, 30, 10
 ; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    cmpwi 7, 9, 0
-; PPC32-NEXT:    mulhwu. 26, 3, 10
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mulhwu. 0, 5, 8
+; PPC32-NEXT:    mcrf 5, 0
+; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mulhwu. 0, 3, 10
 ; PPC32-NEXT:    mcrf 6, 0
+; PPC32-NEXT:    cmpwi 7, 0
+; PPC32-NEXT:    crnor 20, 30, 2
+; PPC32-NEXT:    cmpwi 3, 0
+; PPC32-NEXT:    cmpwi 7, 9, 0
+; PPC32-NEXT:    mulhwu 30, 6, 10
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    crnor 21, 30, 2
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    cmpwi 2, 3, 0
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    crnor 21, 30, 10
-; PPC32-NEXT:    mulhwu. 26, 9, 4
-; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    crorc 20, 20, 6
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    crorc 21, 21, 26
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mulhwu 30, 6, 10
-; PPC32-NEXT:    stw 12, 20(1)
-; PPC32-NEXT:    crorc 20, 20, 22
-; PPC32-NEXT:    crorc 21, 21, 2
-; PPC32-NEXT:    li 11, 0
+; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    mullw 26, 5, 10
 ; PPC32-NEXT:    addc 30, 26, 30
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    crorc 20, 20, 6
+; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mullw 24, 5, 8
+; PPC32-NEXT:    crorc 20, 20, 22
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mullw 23, 7, 6
+; PPC32-NEXT:    add 24, 23, 24
 ; PPC32-NEXT:    mulhwu 29, 5, 10
 ; PPC32-NEXT:    addze 29, 29
-; PPC32-NEXT:    mullw 23, 5, 8
-; PPC32-NEXT:    mullw 22, 7, 6
-; PPC32-NEXT:    mulhwu 0, 6, 9
-; PPC32-NEXT:    mulhwu 12, 5, 9
-; PPC32-NEXT:    mulhwu 27, 8, 6
-; PPC32-NEXT:    mullw 25, 6, 9
-; PPC32-NEXT:    mullw 24, 5, 9
-; PPC32-NEXT:    mullw 5, 9, 4
-; PPC32-NEXT:    add 9, 22, 23
-; PPC32-NEXT:    add 9, 27, 9
-; PPC32-NEXT:    cmplw 1, 9, 27
+; PPC32-NEXT:    mulhwu 28, 8, 6
+; PPC32-NEXT:    add 24, 28, 24
+; PPC32-NEXT:    cmplw 1, 24, 28
 ; PPC32-NEXT:    cror 20, 20, 4
-; PPC32-NEXT:    mullw 23, 3, 10
-; PPC32-NEXT:    add 26, 23, 5
-; PPC32-NEXT:    addc 5, 25, 30
-; PPC32-NEXT:    addze 0, 0
+; PPC32-NEXT:    mullw 22, 9, 4
+; PPC32-NEXT:    mullw 21, 3, 10
+; PPC32-NEXT:    add 28, 21, 22
+; PPC32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
+; PPC32-NEXT:    mulhwu. 23, 9, 4
+; PPC32-NEXT:    crorc 21, 21, 2
 ; PPC32-NEXT:    or. 3, 4, 3
-; PPC32-NEXT:    mulhwu 28, 4, 10
 ; PPC32-NEXT:    mcrf 1, 0
-; PPC32-NEXT:    addc 3, 29, 0
-; PPC32-NEXT:    add 26, 28, 26
-; PPC32-NEXT:    cmplw 6, 26, 28
+; PPC32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
+; PPC32-NEXT:    mulhwu 12, 5, 9
+; PPC32-NEXT:    mullw 25, 6, 9
+; PPC32-NEXT:    mullw 26, 5, 9
+; PPC32-NEXT:    addc 5, 25, 30
+; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
+; PPC32-NEXT:    mulhwu 9, 6, 9
+; PPC32-NEXT:    addze 3, 9
+; PPC32-NEXT:    addc 3, 29, 3
+; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
+; PPC32-NEXT:    mulhwu 11, 4, 10
+; PPC32-NEXT:    add 28, 11, 28
+; PPC32-NEXT:    cmplw 6, 28, 11
+; PPC32-NEXT:    li 11, 0
 ; PPC32-NEXT:    cror 21, 21, 24
-; PPC32-NEXT:    mullw 30, 4, 10
+; PPC32-NEXT:    mullw 27, 4, 10
 ; PPC32-NEXT:    or. 4, 8, 7
 ; PPC32-NEXT:    addze 4, 11
-; PPC32-NEXT:    addc 7, 24, 3
+; PPC32-NEXT:    addc 7, 26, 3
 ; PPC32-NEXT:    crnor 22, 2, 6
-; PPC32-NEXT:    mullw 27, 8, 6
+; PPC32-NEXT:    mullw 0, 8, 6
 ; PPC32-NEXT:    adde 8, 12, 4
-; PPC32-NEXT:    addc 3, 30, 27
-; PPC32-NEXT:    adde 9, 26, 9
+; PPC32-NEXT:    addc 3, 27, 0
+; PPC32-NEXT:    adde 9, 28, 24
 ; PPC32-NEXT:    addc 4, 7, 3
 ; PPC32-NEXT:    adde 3, 8, 9
 ; PPC32-NEXT:    cror 21, 22, 21
 ; PPC32-NEXT:    cmplw 4, 7
 ; PPC32-NEXT:    cmplw 1, 3, 8
-; PPC32-NEXT:    lwz 12, 20(1)
 ; PPC32-NEXT:    cror 20, 21, 20
 ; PPC32-NEXT:    crandc 21, 4, 6
 ; PPC32-NEXT:    crand 22, 6, 0
@@ -117,16 +122,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:  .LBB0_1: # %start
 ; PPC32-NEXT:    li 7, 0
 ; PPC32-NEXT:  .LBB0_2: # %start
-; PPC32-NEXT:    mtcrf 32, 12 # cr2
-; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    addi 1, 1, 64
 ; PPC32-NEXT:    blr
 start:
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index ff7f1fc9029813d..6d510e0a7ffda33 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -12088,38 +12088,38 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 2
 ; PWR5-NEXT:    subfic 3, 3, 2
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -12133,38 +12133,38 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 2
 ; PWR6-NEXT:    subfic 3, 3, 2
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -12222,39 +12222,39 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 3
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -12267,39 +12267,39 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 3
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -12356,38 +12356,38 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 3
 ; PWR5-NEXT:    subfic 3, 3, 3
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -12401,38 +12401,38 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 3
 ; PWR6-NEXT:    subfic 3, 3, 3
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -12490,39 +12490,39 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 4
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -12535,39 +12535,39 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 4
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -12624,38 +12624,38 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 4
 ; PWR5-NEXT:    subfic 3, 3, 4
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -12669,38 +12669,38 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 4
 ; PWR6-NEXT:    subfic 3, 3, 4
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -12758,39 +12758,39 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 5
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -12803,39 +12803,39 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 5
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -12892,38 +12892,38 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 5
 ; PWR5-NEXT:    subfic 3, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -12937,38 +12937,38 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 5
 ; PWR6-NEXT:    subfic 3, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -13026,39 +13026,39 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 6
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -13071,39 +13071,39 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 6
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -13160,38 +13160,38 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 6
 ; PWR5-NEXT:    subfic 3, 3, 6
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -13205,38 +13205,38 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 6
 ; PWR6-NEXT:    subfic 3, 3, 6
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -13294,39 +13294,39 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 7
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -13339,39 +13339,39 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 7
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -13428,38 +13428,38 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 7
 ; PWR5-NEXT:    subfic 3, 3, 7
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -13473,38 +13473,38 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 7
 ; PWR6-NEXT:    subfic 3, 3, 7
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -13562,39 +13562,39 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 8
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -13607,39 +13607,39 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 8
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -13696,38 +13696,38 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 8
 ; PWR5-NEXT:    subfic 3, 3, 8
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -13741,38 +13741,38 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 8
 ; PWR6-NEXT:    subfic 3, 3, 8
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -13830,39 +13830,39 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 9
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -13875,39 +13875,39 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 9
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -13964,38 +13964,38 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 9
 ; PWR5-NEXT:    subfic 3, 3, 9
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -14009,38 +14009,38 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 9
 ; PWR6-NEXT:    subfic 3, 3, 9
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -14098,39 +14098,39 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 10
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -14143,39 +14143,39 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 10
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -14232,38 +14232,38 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 10
 ; PWR5-NEXT:    subfic 3, 3, 10
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -14277,38 +14277,38 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 10
 ; PWR6-NEXT:    subfic 3, 3, 10
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -14366,39 +14366,39 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 11
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -14411,39 +14411,39 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 11
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -14500,38 +14500,38 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 11
 ; PWR5-NEXT:    subfic 3, 3, 11
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -14545,38 +14545,38 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 11
 ; PWR6-NEXT:    subfic 3, 3, 11
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -14634,39 +14634,39 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 12
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -14679,39 +14679,39 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 12
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -14768,38 +14768,38 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 12
 ; PWR5-NEXT:    subfic 3, 3, 12
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -14813,38 +14813,38 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 12
 ; PWR6-NEXT:    subfic 3, 3, 12
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -14902,39 +14902,39 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 13
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -14947,39 +14947,39 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 13
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -15036,38 +15036,38 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 13
 ; PWR5-NEXT:    subfic 3, 3, 13
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -15081,38 +15081,38 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 13
 ; PWR6-NEXT:    subfic 3, 3, 13
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -15170,39 +15170,39 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 14
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -15215,39 +15215,39 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 14
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -15304,38 +15304,38 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 14
 ; PWR5-NEXT:    subfic 3, 3, 14
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -15349,38 +15349,38 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 14
 ; PWR6-NEXT:    subfic 3, 3, 14
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -15438,39 +15438,39 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 15
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -15483,39 +15483,39 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 15
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -15572,38 +15572,38 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 15
 ; PWR5-NEXT:    subfic 3, 3, 15
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -15617,38 +15617,38 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 15
 ; PWR6-NEXT:    subfic 3, 3, 15
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -15706,39 +15706,39 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 16
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -15751,39 +15751,39 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 16
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -15840,38 +15840,38 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 16
 ; PWR5-NEXT:    subfic 3, 3, 16
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -15885,38 +15885,38 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 16
 ; PWR6-NEXT:    subfic 3, 3, 16
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -15974,39 +15974,39 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 17
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -16019,39 +16019,39 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 17
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -16108,38 +16108,38 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 17
 ; PWR5-NEXT:    subfic 3, 3, 17
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -16153,38 +16153,38 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 17
 ; PWR6-NEXT:    subfic 3, 3, 17
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -16242,39 +16242,39 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 18
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -16287,39 +16287,39 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 18
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -16376,38 +16376,38 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 18
 ; PWR5-NEXT:    subfic 3, 3, 18
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -16421,38 +16421,38 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 18
 ; PWR6-NEXT:    subfic 3, 3, 18
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -16510,39 +16510,39 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 19
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -16555,39 +16555,39 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 19
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -16644,38 +16644,38 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 19
 ; PWR5-NEXT:    subfic 3, 3, 19
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -16689,38 +16689,38 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 19
 ; PWR6-NEXT:    subfic 3, 3, 19
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -16778,39 +16778,39 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 20
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -16823,39 +16823,39 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 20
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -16912,38 +16912,38 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 20
 ; PWR5-NEXT:    subfic 3, 3, 20
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -16957,38 +16957,38 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 20
 ; PWR6-NEXT:    subfic 3, 3, 20
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -17046,39 +17046,39 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 21
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -17091,39 +17091,39 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 21
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -17180,38 +17180,38 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 21
 ; PWR5-NEXT:    subfic 3, 3, 21
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -17225,38 +17225,38 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 21
 ; PWR6-NEXT:    subfic 3, 3, 21
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -17314,39 +17314,39 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 22
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -17359,39 +17359,39 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 22
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -17448,38 +17448,38 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 22
 ; PWR5-NEXT:    subfic 3, 3, 22
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -17493,38 +17493,38 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 22
 ; PWR6-NEXT:    subfic 3, 3, 22
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -17582,39 +17582,39 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 23
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -17627,39 +17627,39 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 23
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -17716,38 +17716,38 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 23
 ; PWR5-NEXT:    subfic 3, 3, 23
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -17761,38 +17761,38 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 23
 ; PWR6-NEXT:    subfic 3, 3, 23
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -17850,39 +17850,39 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 24
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -17895,39 +17895,39 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 24
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -17984,38 +17984,38 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 24
 ; PWR5-NEXT:    subfic 3, 3, 24
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -18029,38 +18029,38 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 24
 ; PWR6-NEXT:    subfic 3, 3, 24
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -18118,39 +18118,39 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 25
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -18163,39 +18163,39 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 25
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -18252,38 +18252,38 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 25
 ; PWR5-NEXT:    subfic 3, 3, 25
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -18297,38 +18297,38 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 25
 ; PWR6-NEXT:    subfic 3, 3, 25
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -18386,39 +18386,39 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 26
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -18431,39 +18431,39 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 26
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -18520,38 +18520,38 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 26
 ; PWR5-NEXT:    subfic 3, 3, 26
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -18565,38 +18565,38 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 26
 ; PWR6-NEXT:    subfic 3, 3, 26
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -18654,39 +18654,39 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 27
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -18699,39 +18699,39 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 27
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -18788,38 +18788,38 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 27
 ; PWR5-NEXT:    subfic 3, 3, 27
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -18833,38 +18833,38 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 27
 ; PWR6-NEXT:    subfic 3, 3, 27
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -18922,39 +18922,39 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 28
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -18967,39 +18967,39 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 28
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -19056,38 +19056,38 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 28
 ; PWR5-NEXT:    subfic 3, 3, 28
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -19101,38 +19101,38 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 28
 ; PWR6-NEXT:    subfic 3, 3, 28
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -19190,39 +19190,39 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 29
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -19235,39 +19235,39 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 29
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -19324,38 +19324,38 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 29
 ; PWR5-NEXT:    subfic 3, 3, 29
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -19369,38 +19369,38 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 29
 ; PWR6-NEXT:    subfic 3, 3, 29
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -19458,39 +19458,39 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 30
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -19503,39 +19503,39 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 30
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -19592,38 +19592,38 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 30
 ; PWR5-NEXT:    subfic 3, 3, 30
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -19637,38 +19637,38 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 30
 ; PWR6-NEXT:    subfic 3, 3, 30
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -19726,39 +19726,39 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 31
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -19771,39 +19771,39 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 31
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -19860,38 +19860,38 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 31
 ; PWR5-NEXT:    subfic 3, 3, 31
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -19905,38 +19905,38 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 31
 ; PWR6-NEXT:    subfic 3, 3, 31
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -19994,39 +19994,39 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 32
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -20039,39 +20039,39 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 32
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -20128,38 +20128,38 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 32
 ; PWR5-NEXT:    subfic 3, 3, 32
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -20173,38 +20173,38 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 32
 ; PWR6-NEXT:    subfic 3, 3, 32
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -20262,39 +20262,39 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 33
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -20307,39 +20307,39 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 33
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -20396,38 +20396,38 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 33
 ; PWR5-NEXT:    subfic 3, 3, 33
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -20441,38 +20441,38 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 33
 ; PWR6-NEXT:    subfic 3, 3, 33
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -20530,39 +20530,39 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 34
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -20575,39 +20575,39 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 34
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -20664,38 +20664,38 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 34
 ; PWR5-NEXT:    subfic 3, 3, 34
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -20709,38 +20709,38 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 34
 ; PWR6-NEXT:    subfic 3, 3, 34
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -20798,39 +20798,39 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 35
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -20843,39 +20843,39 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 35
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -20932,38 +20932,38 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 35
 ; PWR5-NEXT:    subfic 3, 3, 35
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -20977,38 +20977,38 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 35
 ; PWR6-NEXT:    subfic 3, 3, 35
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -21066,39 +21066,39 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 36
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -21111,39 +21111,39 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 36
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -21200,38 +21200,38 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 36
 ; PWR5-NEXT:    subfic 3, 3, 36
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -21245,38 +21245,38 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 36
 ; PWR6-NEXT:    subfic 3, 3, 36
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -21334,39 +21334,39 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 37
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -21379,39 +21379,39 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 37
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -21468,38 +21468,38 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 37
 ; PWR5-NEXT:    subfic 3, 3, 37
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -21513,38 +21513,38 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 37
 ; PWR6-NEXT:    subfic 3, 3, 37
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -21602,39 +21602,39 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 38
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -21647,39 +21647,39 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 38
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -21736,38 +21736,38 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 38
 ; PWR5-NEXT:    subfic 3, 3, 38
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -21781,38 +21781,38 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 38
 ; PWR6-NEXT:    subfic 3, 3, 38
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -21870,39 +21870,39 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 39
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -21915,39 +21915,39 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 39
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -22004,38 +22004,38 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 39
 ; PWR5-NEXT:    subfic 3, 3, 39
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -22049,38 +22049,38 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 39
 ; PWR6-NEXT:    subfic 3, 3, 39
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -22138,39 +22138,39 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 40
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -22183,39 +22183,39 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 40
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -22272,38 +22272,38 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 40
 ; PWR5-NEXT:    subfic 3, 3, 40
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -22317,38 +22317,38 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 40
 ; PWR6-NEXT:    subfic 3, 3, 40
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -22406,39 +22406,39 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 41
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -22451,39 +22451,39 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 41
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -22540,38 +22540,38 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 41
 ; PWR5-NEXT:    subfic 3, 3, 41
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -22585,38 +22585,38 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 41
 ; PWR6-NEXT:    subfic 3, 3, 41
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -22674,39 +22674,39 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 42
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -22719,39 +22719,39 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 42
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -22808,38 +22808,38 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 42
 ; PWR5-NEXT:    subfic 3, 3, 42
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -22853,38 +22853,38 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 42
 ; PWR6-NEXT:    subfic 3, 3, 42
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -22942,39 +22942,39 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 43
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -22987,39 +22987,39 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 43
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -23076,38 +23076,38 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 43
 ; PWR5-NEXT:    subfic 3, 3, 43
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -23121,38 +23121,38 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 43
 ; PWR6-NEXT:    subfic 3, 3, 43
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -23210,39 +23210,39 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 44
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -23255,39 +23255,39 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 44
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -23344,38 +23344,38 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 44
 ; PWR5-NEXT:    subfic 3, 3, 44
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -23389,38 +23389,38 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 44
 ; PWR6-NEXT:    subfic 3, 3, 44
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -23478,39 +23478,39 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 45
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -23523,39 +23523,39 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 45
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -23612,38 +23612,38 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 45
 ; PWR5-NEXT:    subfic 3, 3, 45
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -23657,38 +23657,38 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 45
 ; PWR6-NEXT:    subfic 3, 3, 45
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -23746,39 +23746,39 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 46
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -23791,39 +23791,39 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 46
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -23880,38 +23880,38 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 46
 ; PWR5-NEXT:    subfic 3, 3, 46
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -23925,38 +23925,38 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 46
 ; PWR6-NEXT:    subfic 3, 3, 46
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -24014,39 +24014,39 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 47
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -24059,39 +24059,39 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 47
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -24148,38 +24148,38 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 47
 ; PWR5-NEXT:    subfic 3, 3, 47
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -24193,38 +24193,38 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 47
 ; PWR6-NEXT:    subfic 3, 3, 47
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -24282,39 +24282,39 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 48
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -24327,39 +24327,39 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 48
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -24416,38 +24416,38 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 48
 ; PWR5-NEXT:    subfic 3, 3, 48
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -24461,38 +24461,38 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 48
 ; PWR6-NEXT:    subfic 3, 3, 48
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -24550,39 +24550,39 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 49
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -24595,39 +24595,39 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 49
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -24684,38 +24684,38 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 49
 ; PWR5-NEXT:    subfic 3, 3, 49
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -24729,38 +24729,38 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 49
 ; PWR6-NEXT:    subfic 3, 3, 49
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -24818,39 +24818,39 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 50
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -24863,39 +24863,39 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 50
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -24952,38 +24952,38 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 50
 ; PWR5-NEXT:    subfic 3, 3, 50
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -24997,38 +24997,38 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 50
 ; PWR6-NEXT:    subfic 3, 3, 50
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -25086,39 +25086,39 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 51
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -25131,39 +25131,39 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 51
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -25220,38 +25220,38 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 51
 ; PWR5-NEXT:    subfic 3, 3, 51
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -25265,38 +25265,38 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 51
 ; PWR6-NEXT:    subfic 3, 3, 51
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -25354,39 +25354,39 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 52
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -25399,39 +25399,39 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 52
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -25488,38 +25488,38 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 52
 ; PWR5-NEXT:    subfic 3, 3, 52
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -25533,38 +25533,38 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 52
 ; PWR6-NEXT:    subfic 3, 3, 52
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -25622,39 +25622,39 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 53
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -25667,39 +25667,39 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 53
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -25756,38 +25756,38 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 53
 ; PWR5-NEXT:    subfic 3, 3, 53
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -25801,38 +25801,38 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 53
 ; PWR6-NEXT:    subfic 3, 3, 53
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -25890,39 +25890,39 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 54
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -25935,39 +25935,39 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 54
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -26024,38 +26024,38 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 54
 ; PWR5-NEXT:    subfic 3, 3, 54
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -26069,38 +26069,38 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 54
 ; PWR6-NEXT:    subfic 3, 3, 54
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -26158,39 +26158,39 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 55
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -26203,39 +26203,39 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 55
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -26292,38 +26292,38 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 55
 ; PWR5-NEXT:    subfic 3, 3, 55
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -26337,38 +26337,38 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 55
 ; PWR6-NEXT:    subfic 3, 3, 55
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -26426,39 +26426,39 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -26471,39 +26471,39 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -26560,38 +26560,38 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 56
 ; PWR5-NEXT:    subfic 3, 3, 56
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -26605,38 +26605,38 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 56
 ; PWR6-NEXT:    subfic 3, 3, 56
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -26694,39 +26694,39 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 57
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -26739,39 +26739,39 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 57
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -26828,38 +26828,38 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 57
 ; PWR5-NEXT:    subfic 3, 3, 57
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -26873,38 +26873,38 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 57
 ; PWR6-NEXT:    subfic 3, 3, 57
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -26962,39 +26962,39 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 58
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -27007,39 +27007,39 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 58
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -27096,38 +27096,38 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 58
 ; PWR5-NEXT:    subfic 3, 3, 58
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -27141,38 +27141,38 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 58
 ; PWR6-NEXT:    subfic 3, 3, 58
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -27230,39 +27230,39 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 59
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -27275,39 +27275,39 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 59
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -27364,38 +27364,38 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 59
 ; PWR5-NEXT:    subfic 3, 3, 59
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -27409,38 +27409,38 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 59
 ; PWR6-NEXT:    subfic 3, 3, 59
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -27498,39 +27498,39 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 60
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -27543,39 +27543,39 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 60
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -27632,38 +27632,38 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 60
 ; PWR5-NEXT:    subfic 3, 3, 60
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -27677,38 +27677,38 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 60
 ; PWR6-NEXT:    subfic 3, 3, 60
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -27766,39 +27766,39 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 61
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -27811,39 +27811,39 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 61
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -27900,38 +27900,38 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 61
 ; PWR5-NEXT:    subfic 3, 3, 61
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -27945,38 +27945,38 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 61
 ; PWR6-NEXT:    subfic 3, 3, 61
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -28034,39 +28034,39 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 62
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -28079,39 +28079,39 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 62
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
@@ -28168,38 +28168,38 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    li 5, 62
 ; PWR5-NEXT:    subfic 3, 3, 62
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -28213,38 +28213,38 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    li 5, 62
 ; PWR6-NEXT:    subfic 3, 3, 62
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -28302,39 +28302,39 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ; PWR5-NEXT:    lis 5, 21845
 ; PWR5-NEXT:    lis 6, 13107
 ; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    rotldi 8, 3, 63
+; PWR5-NEXT:    rotldi 9, 4, 63
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
+; PWR5-NEXT:    and 9, 9, 5
+; PWR5-NEXT:    and 5, 8, 5
 ; PWR5-NEXT:    ori 6, 6, 13107
 ; PWR5-NEXT:    sub 3, 3, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
+; PWR5-NEXT:    sub 4, 4, 9
+; PWR5-NEXT:    and 9, 3, 6
 ; PWR5-NEXT:    rotldi 3, 3, 62
 ; PWR5-NEXT:    and 3, 3, 6
 ; PWR5-NEXT:    lis 7, 3855
 ; PWR5-NEXT:    and 5, 4, 6
 ; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
+; PWR5-NEXT:    add 3, 9, 3
+; PWR5-NEXT:    lis 8, 257
 ; PWR5-NEXT:    ori 7, 7, 3855
 ; PWR5-NEXT:    and 4, 4, 6
 ; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
+; PWR5-NEXT:    ori 8, 8, 257
 ; PWR5-NEXT:    rldimi 7, 7, 32, 0
 ; PWR5-NEXT:    add 4, 5, 4
 ; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
+; PWR5-NEXT:    rldimi 8, 8, 32, 0
 ; PWR5-NEXT:    rldicl 5, 4, 60, 4
 ; PWR5-NEXT:    and 3, 3, 7
 ; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
+; PWR5-NEXT:    mulld 3, 3, 8
 ; PWR5-NEXT:    and 4, 4, 7
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 63
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 8
 ; PWR5-NEXT:    subc 6, 3, 5
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
 ; PWR5-NEXT:    subfe 3, 3, 3
@@ -28347,39 +28347,39 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ; PWR6-NEXT:    lis 5, 21845
 ; PWR6-NEXT:    lis 6, 13107
 ; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    rotldi 8, 3, 63
+; PWR6-NEXT:    rotldi 9, 4, 63
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
+; PWR6-NEXT:    and 9, 9, 5
+; PWR6-NEXT:    and 5, 8, 5
 ; PWR6-NEXT:    ori 6, 6, 13107
 ; PWR6-NEXT:    sub 3, 3, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
+; PWR6-NEXT:    sub 4, 4, 9
+; PWR6-NEXT:    and 9, 3, 6
 ; PWR6-NEXT:    rotldi 3, 3, 62
 ; PWR6-NEXT:    and 3, 3, 6
 ; PWR6-NEXT:    lis 7, 3855
 ; PWR6-NEXT:    and 5, 4, 6
 ; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
+; PWR6-NEXT:    add 3, 9, 3
+; PWR6-NEXT:    lis 8, 257
 ; PWR6-NEXT:    ori 7, 7, 3855
 ; PWR6-NEXT:    and 4, 4, 6
 ; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
+; PWR6-NEXT:    ori 8, 8, 257
 ; PWR6-NEXT:    rldimi 7, 7, 32, 0
 ; PWR6-NEXT:    add 4, 5, 4
 ; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
+; PWR6-NEXT:    rldimi 8, 8, 32, 0
 ; PWR6-NEXT:    rldicl 5, 4, 60, 4
 ; PWR6-NEXT:    and 3, 3, 7
 ; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
+; PWR6-NEXT:    mulld 3, 3, 8
 ; PWR6-NEXT:    and 4, 4, 7
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 63
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 8
 ; PWR6-NEXT:    subc 6, 3, 5
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
 ; PWR6-NEXT:    subfe 3, 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index abfe6a953dd6c83..d7d171691f7c05d 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -528,16 +528,18 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-LABEL: lshr_32bytes:
 ; LE-32BIT:       # %bb.0:
 ; LE-32BIT-NEXT:    stwu 1, -112(1)
-; LE-32BIT-NEXT:    lwz 7, 0(3)
+; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
 ; LE-32BIT-NEXT:    li 6, 0
+; LE-32BIT-NEXT:    lwz 7, 0(3)
 ; LE-32BIT-NEXT:    lwz 8, 4(3)
 ; LE-32BIT-NEXT:    lwz 9, 8(3)
 ; LE-32BIT-NEXT:    lwz 10, 12(3)
 ; LE-32BIT-NEXT:    lwz 11, 16(3)
 ; LE-32BIT-NEXT:    lwz 12, 20(3)
 ; LE-32BIT-NEXT:    lwz 0, 24(3)
-; LE-32BIT-NEXT:    lwz 3, 28(3)
-; LE-32BIT-NEXT:    lwz 4, 28(4)
+; LE-32BIT-NEXT:    lwz 30, 28(3)
+; LE-32BIT-NEXT:    lwz 3, 28(4)
+; LE-32BIT-NEXT:    addi 4, 1, 52
 ; LE-32BIT-NEXT:    stw 6, 48(1)
 ; LE-32BIT-NEXT:    stw 6, 44(1)
 ; LE-32BIT-NEXT:    stw 6, 40(1)
@@ -546,16 +548,14 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 6, 28(1)
 ; LE-32BIT-NEXT:    stw 6, 24(1)
 ; LE-32BIT-NEXT:    stw 6, 20(1)
-; LE-32BIT-NEXT:    rlwinm 6, 4, 29, 27, 31
-; LE-32BIT-NEXT:    stw 3, 80(1)
-; LE-32BIT-NEXT:    addi 3, 1, 52
+; LE-32BIT-NEXT:    rlwinm 6, 3, 29, 27, 31
 ; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
-; LE-32BIT-NEXT:    sub 3, 3, 6
+; LE-32BIT-NEXT:    sub 4, 4, 6
 ; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
 ; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
 ; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
 ; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
-; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT:    stw 30, 80(1)
 ; LE-32BIT-NEXT:    stw 0, 76(1)
 ; LE-32BIT-NEXT:    stw 12, 72(1)
 ; LE-32BIT-NEXT:    stw 11, 68(1)
@@ -563,51 +563,51 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    stw 9, 60(1)
 ; LE-32BIT-NEXT:    li 9, 7
 ; LE-32BIT-NEXT:    stw 8, 56(1)
-; LE-32BIT-NEXT:    nand 9, 4, 9
+; LE-32BIT-NEXT:    nand 9, 3, 9
 ; LE-32BIT-NEXT:    stw 7, 52(1)
-; LE-32BIT-NEXT:    clrlwi 4, 4, 29
-; LE-32BIT-NEXT:    lwz 6, 4(3)
-; LE-32BIT-NEXT:    subfic 30, 4, 32
-; LE-32BIT-NEXT:    lwz 7, 8(3)
+; LE-32BIT-NEXT:    clrlwi 3, 3, 29
+; LE-32BIT-NEXT:    lwz 6, 28(4)
+; LE-32BIT-NEXT:    subfic 30, 3, 32
+; LE-32BIT-NEXT:    lwz 7, 4(4)
 ; LE-32BIT-NEXT:    clrlwi 9, 9, 27
-; LE-32BIT-NEXT:    lwz 8, 12(3)
-; LE-32BIT-NEXT:    slwi 29, 6, 1
-; LE-32BIT-NEXT:    lwz 10, 16(3)
-; LE-32BIT-NEXT:    srw 28, 7, 4
-; LE-32BIT-NEXT:    lwz 11, 20(3)
-; LE-32BIT-NEXT:    slwi 27, 8, 1
-; LE-32BIT-NEXT:    lwz 12, 24(3)
-; LE-32BIT-NEXT:    srw 26, 10, 4
-; LE-32BIT-NEXT:    lwz 0, 0(3)
-; LE-32BIT-NEXT:    srw 6, 6, 4
-; LE-32BIT-NEXT:    lwz 3, 28(3)
-; LE-32BIT-NEXT:    srw 25, 12, 4
-; LE-32BIT-NEXT:    slw 12, 12, 30
-; LE-32BIT-NEXT:    slw 7, 7, 30
-; LE-32BIT-NEXT:    srw 3, 3, 4
-; LE-32BIT-NEXT:    slw 10, 10, 30
-; LE-32BIT-NEXT:    slw 30, 0, 30
-; LE-32BIT-NEXT:    srw 8, 8, 4
-; LE-32BIT-NEXT:    srw 0, 0, 4
-; LE-32BIT-NEXT:    srw 4, 11, 4
-; LE-32BIT-NEXT:    or 3, 12, 3
-; LE-32BIT-NEXT:    stw 3, 28(5)
-; LE-32BIT-NEXT:    or 3, 10, 4
-; LE-32BIT-NEXT:    slwi 11, 11, 1
+; LE-32BIT-NEXT:    lwz 8, 8(4)
+; LE-32BIT-NEXT:    srw 6, 6, 3
+; LE-32BIT-NEXT:    lwz 10, 12(4)
+; LE-32BIT-NEXT:    slwi 29, 7, 1
+; LE-32BIT-NEXT:    lwz 11, 16(4)
+; LE-32BIT-NEXT:    srw 28, 8, 3
+; LE-32BIT-NEXT:    lwz 12, 20(4)
+; LE-32BIT-NEXT:    slwi 27, 10, 1
+; LE-32BIT-NEXT:    lwz 0, 24(4)
+; LE-32BIT-NEXT:    srw 26, 11, 3
+; LE-32BIT-NEXT:    lwz 4, 0(4)
+; LE-32BIT-NEXT:    srw 7, 7, 3
+; LE-32BIT-NEXT:    srw 25, 0, 3
+; LE-32BIT-NEXT:    slw 8, 8, 30
+; LE-32BIT-NEXT:    slw 11, 11, 30
+; LE-32BIT-NEXT:    slw 0, 0, 30
+; LE-32BIT-NEXT:    slw 30, 4, 30
+; LE-32BIT-NEXT:    srw 10, 10, 3
+; LE-32BIT-NEXT:    srw 4, 4, 3
+; LE-32BIT-NEXT:    srw 3, 12, 3
+; LE-32BIT-NEXT:    or 3, 11, 3
 ; LE-32BIT-NEXT:    stw 3, 20(5)
-; LE-32BIT-NEXT:    or 3, 7, 8
-; LE-32BIT-NEXT:    slw 29, 29, 9
+; LE-32BIT-NEXT:    or 3, 8, 10
+; LE-32BIT-NEXT:    slwi 12, 12, 1
 ; LE-32BIT-NEXT:    slw 27, 27, 9
-; LE-32BIT-NEXT:    slw 9, 11, 9
 ; LE-32BIT-NEXT:    stw 3, 12(5)
-; LE-32BIT-NEXT:    or 3, 30, 6
+; LE-32BIT-NEXT:    or 3, 30, 7
+; LE-32BIT-NEXT:    slw 29, 29, 9
+; LE-32BIT-NEXT:    slw 9, 12, 9
 ; LE-32BIT-NEXT:    stw 3, 4(5)
-; LE-32BIT-NEXT:    or 3, 25, 9
-; LE-32BIT-NEXT:    stw 3, 24(5)
 ; LE-32BIT-NEXT:    or 3, 26, 27
 ; LE-32BIT-NEXT:    stw 3, 16(5)
+; LE-32BIT-NEXT:    or 3, 25, 9
+; LE-32BIT-NEXT:    stw 4, 0(5)
+; LE-32BIT-NEXT:    or 4, 0, 6
+; LE-32BIT-NEXT:    stw 3, 24(5)
 ; LE-32BIT-NEXT:    or 3, 28, 29
-; LE-32BIT-NEXT:    stw 0, 0(5)
+; LE-32BIT-NEXT:    stw 4, 28(5)
 ; LE-32BIT-NEXT:    stw 3, 8(5)
 ; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
 ; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
@@ -755,19 +755,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    clrlwi 4, 4, 29
 ; LE-32BIT-NEXT:    subfic 0, 4, 32
 ; LE-32BIT-NEXT:    clrlwi 8, 8, 27
-; LE-32BIT-NEXT:    lwz 7, 8(6)
-; LE-32BIT-NEXT:    slw 3, 3, 4
 ; LE-32BIT-NEXT:    lwz 9, 4(6)
+; LE-32BIT-NEXT:    slw 3, 3, 4
 ; LE-32BIT-NEXT:    lwz 10, 16(6)
-; LE-32BIT-NEXT:    srwi 29, 7, 1
-; LE-32BIT-NEXT:    lwz 11, 12(6)
+; LE-32BIT-NEXT:    lwz 7, 8(6)
 ; LE-32BIT-NEXT:    slw 28, 9, 4
-; LE-32BIT-NEXT:    lwz 12, 24(6)
+; LE-32BIT-NEXT:    lwz 11, 12(6)
 ; LE-32BIT-NEXT:    srwi 27, 10, 1
+; LE-32BIT-NEXT:    lwz 12, 24(6)
+; LE-32BIT-NEXT:    srw 9, 9, 0
 ; LE-32BIT-NEXT:    lwz 30, 20(6)
-; LE-32BIT-NEXT:    slw 26, 11, 4
+; LE-32BIT-NEXT:    srwi 29, 7, 1
 ; LE-32BIT-NEXT:    lwz 6, 28(6)
-; LE-32BIT-NEXT:    srw 9, 9, 0
+; LE-32BIT-NEXT:    slw 26, 11, 4
 ; LE-32BIT-NEXT:    slw 25, 30, 4
 ; LE-32BIT-NEXT:    srw 11, 11, 0
 ; LE-32BIT-NEXT:    slw 7, 7, 4
@@ -777,20 +777,20 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    slw 6, 6, 4
 ; LE-32BIT-NEXT:    slw 4, 12, 4
 ; LE-32BIT-NEXT:    srwi 12, 12, 1
-; LE-32BIT-NEXT:    srw 29, 29, 8
 ; LE-32BIT-NEXT:    srw 27, 27, 8
-; LE-32BIT-NEXT:    srw 8, 12, 8
 ; LE-32BIT-NEXT:    or 3, 3, 9
+; LE-32BIT-NEXT:    srw 29, 29, 8
+; LE-32BIT-NEXT:    srw 8, 12, 8
 ; LE-32BIT-NEXT:    or 4, 4, 0
 ; LE-32BIT-NEXT:    stw 3, 0(5)
-; LE-32BIT-NEXT:    or 3, 25, 8
+; LE-32BIT-NEXT:    or 3, 26, 27
 ; LE-32BIT-NEXT:    stw 4, 24(5)
 ; LE-32BIT-NEXT:    or 4, 10, 30
-; LE-32BIT-NEXT:    stw 3, 20(5)
-; LE-32BIT-NEXT:    or 3, 26, 27
+; LE-32BIT-NEXT:    stw 3, 12(5)
+; LE-32BIT-NEXT:    or 3, 25, 8
 ; LE-32BIT-NEXT:    stw 4, 16(5)
 ; LE-32BIT-NEXT:    or 4, 7, 11
-; LE-32BIT-NEXT:    stw 3, 12(5)
+; LE-32BIT-NEXT:    stw 3, 20(5)
 ; LE-32BIT-NEXT:    or 3, 28, 29
 ; LE-32BIT-NEXT:    stw 6, 28(5)
 ; LE-32BIT-NEXT:    stw 4, 8(5)
@@ -967,19 +967,19 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-32BIT-NEXT:    or 3, 12, 3
 ; LE-32BIT-NEXT:    stw 3, 28(5)
 ; LE-32BIT-NEXT:    or 3, 10, 4
-; LE-32BIT-NEXT:    slwi 11, 11, 1
 ; LE-32BIT-NEXT:    stw 3, 20(5)
 ; LE-32BIT-NEXT:    or 3, 7, 8
-; LE-32BIT-NEXT:    slw 29, 29, 9
+; LE-32BIT-NEXT:    slwi 11, 11, 1
 ; LE-32BIT-NEXT:    slw 27, 27, 9
-; LE-32BIT-NEXT:    slw 9, 11, 9
 ; LE-32BIT-NEXT:    stw 3, 12(5)
 ; LE-32BIT-NEXT:    or 3, 30, 6
+; LE-32BIT-NEXT:    slw 29, 29, 9
+; LE-32BIT-NEXT:    slw 9, 11, 9
 ; LE-32BIT-NEXT:    stw 3, 4(5)
-; LE-32BIT-NEXT:    or 3, 25, 9
-; LE-32BIT-NEXT:    stw 3, 24(5)
 ; LE-32BIT-NEXT:    or 3, 26, 27
 ; LE-32BIT-NEXT:    stw 3, 16(5)
+; LE-32BIT-NEXT:    or 3, 25, 9
+; LE-32BIT-NEXT:    stw 3, 24(5)
 ; LE-32BIT-NEXT:    or 3, 28, 29
 ; LE-32BIT-NEXT:    stw 0, 0(5)
 ; LE-32BIT-NEXT:    stw 3, 8(5)
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
index 9c69fe0a6e48653..8ae48a8c5c90e8f 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
@@ -562,21 +562,20 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a1, a1, 2
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 1
-; RV32I-NEXT:    lui a6, 349525
-; RV32I-NEXT:    addi a6, a6, 1365
-; RV32I-NEXT:    and a2, a2, a6
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a2, a2, a1
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a6, a0, 8
+; RV32I-NEXT:    and a6, a6, a3
 ; RV32I-NEXT:    srli a7, a0, 24
-; RV32I-NEXT:    or a1, a1, a7
+; RV32I-NEXT:    or a6, a6, a7
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    slli t0, a1, 1
 ; RV32I-NEXT:    and a3, a0, a3
 ; RV32I-NEXT:    slli a3, a3, 8
 ; RV32I-NEXT:    slli a0, a0, 24
 ; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    or a0, a0, a6
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    and a1, a1, a4
 ; RV32I-NEXT:    and a0, a0, a4
@@ -588,11 +587,11 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a0, a0, a7
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a1, a1, a0
-; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    or a0, a2, t0
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_i64:
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index f2b7e8d26328d5b..58b6d399a4538e9 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1440,11 +1440,11 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    sltu a4, s9, s5
 ; RV32I-NEXT:    sltu a5, s8, s7
 ; RV32I-NEXT:    add a5, s6, a5
-; RV32I-NEXT:    add a4, a5, a4
 ; RV32I-NEXT:    add a1, a1, s3
 ; RV32I-NEXT:    sltu a0, s2, a0
 ; RV32I-NEXT:    add a0, a1, a0
-; RV32I-NEXT:    add a0, a4, a0
+; RV32I-NEXT:    add a0, a5, a0
+; RV32I-NEXT:    add a0, a0, a4
 ; RV32I-NEXT:    add a1, a0, a3
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 4852850f234ba6f..15d1aee7e67411a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1074,29 +1074,29 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
 ; RV32-NEXT:    .cfi_offset fs1, -16
 ; RV32-NEXT:    sw a6, 8(sp)
 ; RV32-NEXT:    sw a7, 12(sp)
-; RV32-NEXT:    fld ft4, 8(sp)
+; RV32-NEXT:    fld ft6, 8(sp)
 ; RV32-NEXT:    sw a4, 8(sp)
 ; RV32-NEXT:    sw a5, 12(sp)
-; RV32-NEXT:    fld ft5, 8(sp)
+; RV32-NEXT:    fld ft7, 8(sp)
 ; RV32-NEXT:    sw a2, 8(sp)
 ; RV32-NEXT:    sw a3, 12(sp)
-; RV32-NEXT:    fld ft6, 8(sp)
+; RV32-NEXT:    fld ft8, 8(sp)
 ; RV32-NEXT:    sw a0, 8(sp)
 ; RV32-NEXT:    sw a1, 12(sp)
-; RV32-NEXT:    fld ft7, 8(sp)
+; RV32-NEXT:    fld ft9, 8(sp)
 ; RV32-NEXT:    fld ft0, 184(sp)
 ; RV32-NEXT:    fld ft1, 168(sp)
 ; RV32-NEXT:    fld ft2, 152(sp)
-; RV32-NEXT:    fld ft3, 136(sp)
-; RV32-NEXT:    fld ft8, 120(sp)
-; RV32-NEXT:    fld ft9, 104(sp)
-; RV32-NEXT:    fld ft10, 72(sp)
-; RV32-NEXT:    fld ft11, 88(sp)
-; RV32-NEXT:    fld fs0, 56(sp)
+; RV32-NEXT:    fld ft4, 136(sp)
+; RV32-NEXT:    fld ft5, 120(sp)
+; RV32-NEXT:    fld ft10, 104(sp)
+; RV32-NEXT:    fld ft11, 72(sp)
+; RV32-NEXT:    fld fs0, 88(sp)
+; RV32-NEXT:    fld ft3, 56(sp)
 ; RV32-NEXT:    fld fs1, 40(sp)
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vfmv.v.f v8, ft7
-; RV32-NEXT:    vfslide1down.vf v12, v8, ft6
+; RV32-NEXT:    vfmv.v.f v8, ft9
+; RV32-NEXT:    vfslide1down.vf v12, v8, ft8
 ; RV32-NEXT:    vfmv.v.f v8, fa2
 ; RV32-NEXT:    vfslide1down.vf v9, v8, fa3
 ; RV32-NEXT:    vfmv.v.f v8, fa0
@@ -1105,38 +1105,38 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
 ; RV32-NEXT:    vfslide1down.vf v10, v10, fa5
 ; RV32-NEXT:    vfmv.v.f v11, fa6
 ; RV32-NEXT:    vfslide1down.vf v11, v11, fa7
+; RV32-NEXT:    vfmv.v.f v13, ft7
 ; RV32-NEXT:    addi a0, sp, 32
 ; RV32-NEXT:    vlse64.v v14, (a0), zero
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    addi a0, sp, 80
 ; RV32-NEXT:    vlse64.v v15, (a0), zero
-; RV32-NEXT:    vfmv.v.f v13, ft5
-; RV32-NEXT:    vfslide1down.vf v13, v13, ft4
+; RV32-NEXT:    vfslide1down.vf v13, v13, ft6
 ; RV32-NEXT:    vfslide1down.vf v14, v14, fs1
-; RV32-NEXT:    vfslide1down.vf v15, v15, fs0
-; RV32-NEXT:    addi a0, sp, 80
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    addi a0, sp, 64
-; RV32-NEXT:    vlse64.v v18, (a0), zero
-; RV32-NEXT:    addi a0, sp, 96
-; RV32-NEXT:    vlse64.v v19, (a0), zero
-; RV32-NEXT:    addi a0, sp, 112
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    vfslide1down.vf v17, v16, ft11
-; RV32-NEXT:    vfslide1down.vf v16, v18, ft10
-; RV32-NEXT:    vfslide1down.vf v18, v19, ft9
-; RV32-NEXT:    vfslide1down.vf v19, v20, ft8
-; RV32-NEXT:    addi a0, sp, 128
-; RV32-NEXT:    vlse64.v v20, (a0), zero
-; RV32-NEXT:    addi a0, sp, 144
-; RV32-NEXT:    vlse64.v v21, (a0), zero
-; RV32-NEXT:    addi a0, sp, 160
-; RV32-NEXT:    vlse64.v v22, (a0), zero
-; RV32-NEXT:    addi a0, sp, 176
-; RV32-NEXT:    vlse64.v v23, (a0), zero
-; RV32-NEXT:    vfslide1down.vf v20, v20, ft3
-; RV32-NEXT:    vfslide1down.vf v21, v21, ft2
+; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    vfslide1down.vf v17, v15, fs0
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    vlse64.v v15, (a1), zero
+; RV32-NEXT:    addi a1, sp, 96
+; RV32-NEXT:    vlse64.v v18, (a1), zero
+; RV32-NEXT:    addi a1, sp, 112
+; RV32-NEXT:    vlse64.v v19, (a1), zero
+; RV32-NEXT:    addi a1, sp, 128
+; RV32-NEXT:    vlse64.v v20, (a1), zero
+; RV32-NEXT:    vfslide1down.vf v16, v15, ft11
+; RV32-NEXT:    vfslide1down.vf v18, v18, ft10
+; RV32-NEXT:    vfslide1down.vf v19, v19, ft5
+; RV32-NEXT:    vfslide1down.vf v20, v20, ft4
+; RV32-NEXT:    addi a1, sp, 144
+; RV32-NEXT:    vlse64.v v15, (a1), zero
+; RV32-NEXT:    addi a1, sp, 160
+; RV32-NEXT:    vlse64.v v22, (a1), zero
+; RV32-NEXT:    addi a1, sp, 176
+; RV32-NEXT:    vlse64.v v23, (a1), zero
+; RV32-NEXT:    vlse64.v v24, (a0), zero
+; RV32-NEXT:    vfslide1down.vf v21, v15, ft2
 ; RV32-NEXT:    vfslide1down.vf v22, v22, ft1
 ; RV32-NEXT:    vfslide1down.vf v23, v23, ft0
+; RV32-NEXT:    vfslide1down.vf v15, v24, ft3
 ; RV32-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
 ; RV32-NEXT:    fld fs1, 16(sp) # 8-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
@@ -1144,32 +1144,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
 ;
 ; RV64-LABEL: buildvec_v32f64_exact_vlen:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -32
-; RV64-NEXT:    .cfi_def_cfa_offset 32
-; RV64-NEXT:    fsd fs0, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT:    fsd fs1, 16(sp) # 8-byte Folded Spill
-; RV64-NEXT:    fsd fs2, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    fsd fs3, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    fsd fs0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    fsd fs1, 0(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset fs0, -8
 ; RV64-NEXT:    .cfi_offset fs1, -16
-; RV64-NEXT:    .cfi_offset fs2, -24
-; RV64-NEXT:    .cfi_offset fs3, -32
-; RV64-NEXT:    fmv.d.x ft4, a7
-; RV64-NEXT:    fmv.d.x ft5, a6
-; RV64-NEXT:    fmv.d.x ft6, a5
-; RV64-NEXT:    fmv.d.x ft7, a4
-; RV64-NEXT:    fmv.d.x ft8, a3
-; RV64-NEXT:    fmv.d.x ft9, a2
-; RV64-NEXT:    fmv.d.x ft10, a1
-; RV64-NEXT:    fmv.d.x ft11, a0
-; RV64-NEXT:    fld ft0, 152(sp)
-; RV64-NEXT:    fld ft1, 136(sp)
-; RV64-NEXT:    fld ft2, 120(sp)
-; RV64-NEXT:    fld ft3, 104(sp)
-; RV64-NEXT:    fld fs0, 88(sp)
-; RV64-NEXT:    fld fs1, 72(sp)
-; RV64-NEXT:    fld fs2, 40(sp)
-; RV64-NEXT:    fld fs3, 56(sp)
+; RV64-NEXT:    fmv.d.x ft4, a5
+; RV64-NEXT:    fmv.d.x ft5, a4
+; RV64-NEXT:    fmv.d.x ft6, a3
+; RV64-NEXT:    fmv.d.x ft7, a2
+; RV64-NEXT:    fmv.d.x ft8, a1
+; RV64-NEXT:    fmv.d.x ft9, a0
+; RV64-NEXT:    fld ft0, 136(sp)
+; RV64-NEXT:    fld ft1, 120(sp)
+; RV64-NEXT:    fld ft2, 104(sp)
+; RV64-NEXT:    fld ft3, 88(sp)
+; RV64-NEXT:    fld ft10, 72(sp)
+; RV64-NEXT:    fld ft11, 56(sp)
+; RV64-NEXT:    fld fs0, 24(sp)
+; RV64-NEXT:    fld fs1, 40(sp)
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vfmv.v.f v8, fa2
 ; RV64-NEXT:    vfslide1down.vf v9, v8, fa3
@@ -1179,43 +1173,43 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
 ; RV64-NEXT:    vfslide1down.vf v10, v10, fa5
 ; RV64-NEXT:    vfmv.v.f v11, fa6
 ; RV64-NEXT:    vfslide1down.vf v11, v11, fa7
-; RV64-NEXT:    vfmv.v.f v12, ft11
-; RV64-NEXT:    vfslide1down.vf v12, v12, ft10
-; RV64-NEXT:    vfmv.v.f v13, ft9
-; RV64-NEXT:    vfslide1down.vf v13, v13, ft8
-; RV64-NEXT:    vfmv.v.f v14, ft7
-; RV64-NEXT:    vfslide1down.vf v14, v14, ft6
-; RV64-NEXT:    vfmv.v.f v15, ft5
-; RV64-NEXT:    vfslide1down.vf v15, v15, ft4
-; RV64-NEXT:    addi a0, sp, 48
-; RV64-NEXT:    vlse64.v v16, (a0), zero
+; RV64-NEXT:    vfmv.v.f v12, ft9
+; RV64-NEXT:    vfslide1down.vf v12, v12, ft8
+; RV64-NEXT:    vfmv.v.f v13, ft7
+; RV64-NEXT:    vfslide1down.vf v13, v13, ft6
+; RV64-NEXT:    vfmv.v.f v14, ft5
 ; RV64-NEXT:    addi a0, sp, 32
-; RV64-NEXT:    vlse64.v v18, (a0), zero
+; RV64-NEXT:    vlse64.v v15, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v14, v14, ft4
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vlse64.v v16, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v17, v15, fs1
+; RV64-NEXT:    addi a0, sp, 48
+; RV64-NEXT:    vlse64.v v15, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v16, v16, fs0
 ; RV64-NEXT:    addi a0, sp, 64
 ; RV64-NEXT:    vlse64.v v19, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v18, v15, ft11
 ; RV64-NEXT:    addi a0, sp, 80
-; RV64-NEXT:    vlse64.v v20, (a0), zero
-; RV64-NEXT:    vfslide1down.vf v17, v16, fs3
-; RV64-NEXT:    vfslide1down.vf v16, v18, fs2
-; RV64-NEXT:    vfslide1down.vf v18, v19, fs1
-; RV64-NEXT:    vfslide1down.vf v19, v20, fs0
+; RV64-NEXT:    vlse64.v v15, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v19, v19, ft10
 ; RV64-NEXT:    addi a0, sp, 96
-; RV64-NEXT:    vlse64.v v20, (a0), zero
-; RV64-NEXT:    addi a0, sp, 112
 ; RV64-NEXT:    vlse64.v v21, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v20, v15, ft3
+; RV64-NEXT:    addi a0, sp, 112
+; RV64-NEXT:    vlse64.v v15, (a0), zero
+; RV64-NEXT:    vfslide1down.vf v21, v21, ft2
 ; RV64-NEXT:    addi a0, sp, 128
-; RV64-NEXT:    vlse64.v v22, (a0), zero
-; RV64-NEXT:    addi a0, sp, 144
 ; RV64-NEXT:    vlse64.v v23, (a0), zero
-; RV64-NEXT:    vfslide1down.vf v20, v20, ft3
-; RV64-NEXT:    vfslide1down.vf v21, v21, ft2
-; RV64-NEXT:    vfslide1down.vf v22, v22, ft1
+; RV64-NEXT:    vfslide1down.vf v22, v15, ft1
+; RV64-NEXT:    fmv.d.x fa5, a7
+; RV64-NEXT:    fmv.d.x fa4, a6
 ; RV64-NEXT:    vfslide1down.vf v23, v23, ft0
-; RV64-NEXT:    fld fs0, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    fld fs1, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    fld fs2, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    fld fs3, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    vfmv.v.f v15, fa4
+; RV64-NEXT:    vfslide1down.vf v15, v15, fa5
+; RV64-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    fld fs1, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v0 = insertelement <32 x double> poison, double %e0, i64 0
   %v1 = insertelement <32 x double> %v0, double %e1, i64 1
@@ -1394,6 +1388,3 @@ define <2 x double> @vid_step2_v2f64() {
 ; CHECK-NEXT:    ret
   ret <2 x double> <double 0.0, double 2.0>
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll
index e5bbbd661e6a1df..de185a17e8991fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll
@@ -124,19 +124,19 @@ define i8 @explode_16xi8(<16 x i8> %v) {
 ; CHECK-NEXT:    vredxor.vs v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s t6, v8
 ; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    add a0, t6, a0
-; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a2, a2, a4
 ; CHECK-NEXT:    add a0, a0, a2
-; CHECK-NEXT:    add a5, a5, a6
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a5, a5, t0
-; CHECK-NEXT:    add a0, a0, a5
-; CHECK-NEXT:    add t1, t1, t2
-; CHECK-NEXT:    add t1, t1, t3
-; CHECK-NEXT:    add t1, t1, t4
-; CHECK-NEXT:    add t1, t1, t5
-; CHECK-NEXT:    add a0, a0, t1
+; CHECK-NEXT:    add a0, t6, a0
+; CHECK-NEXT:    add a3, a3, a4
+; CHECK-NEXT:    add a3, a3, a5
+; CHECK-NEXT:    add a3, a3, a6
+; CHECK-NEXT:    add a0, a0, a3
+; CHECK-NEXT:    add a7, a7, t0
+; CHECK-NEXT:    add a7, a7, t1
+; CHECK-NEXT:    add a7, a7, t2
+; CHECK-NEXT:    add a0, a0, a7
+; CHECK-NEXT:    add t3, t3, t4
+; CHECK-NEXT:    add t3, t3, t5
+; CHECK-NEXT:    add a0, a0, t3
 ; CHECK-NEXT:    ret
   %e0 = extractelement <16 x i8> %v, i32 0
   %e1 = extractelement <16 x i8> %v, i32 1
@@ -295,19 +295,19 @@ define i16 @explode_16xi16(<16 x i16> %v) {
 ; CHECK-NEXT:    vredxor.vs v8, v8, v9
 ; CHECK-NEXT:    vmv.x.s t6, v8
 ; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    add a0, t6, a0
-; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a2, a2, a4
 ; CHECK-NEXT:    add a0, a0, a2
-; CHECK-NEXT:    add a5, a5, a6
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a5, a5, t0
-; CHECK-NEXT:    add a0, a0, a5
-; CHECK-NEXT:    add t1, t1, t2
-; CHECK-NEXT:    add t1, t1, t3
-; CHECK-NEXT:    add t1, t1, t4
-; CHECK-NEXT:    add t1, t1, t5
-; CHECK-NEXT:    add a0, a0, t1
+; CHECK-NEXT:    add a0, t6, a0
+; CHECK-NEXT:    add a3, a3, a4
+; CHECK-NEXT:    add a3, a3, a5
+; CHECK-NEXT:    add a3, a3, a6
+; CHECK-NEXT:    add a0, a0, a3
+; CHECK-NEXT:    add a7, a7, t0
+; CHECK-NEXT:    add a7, a7, t1
+; CHECK-NEXT:    add a7, a7, t2
+; CHECK-NEXT:    add a0, a0, a7
+; CHECK-NEXT:    add t3, t3, t4
+; CHECK-NEXT:    add t3, t3, t5
+; CHECK-NEXT:    add a0, a0, t3
 ; CHECK-NEXT:    ret
   %e0 = extractelement <16 x i16> %v, i32 0
   %e1 = extractelement <16 x i16> %v, i32 1
@@ -1119,19 +1119,19 @@ define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) {
 ; RV32-NEXT:    vredxor.vs v8, v8, v9
 ; RV32-NEXT:    vmv.x.s t6, v8
 ; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, t6, a0
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    add a2, a2, a4
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    add a5, a5, a7
-; RV32-NEXT:    add a5, a5, t0
-; RV32-NEXT:    add a0, a0, a5
-; RV32-NEXT:    add t1, t1, t2
-; RV32-NEXT:    add t1, t1, t3
-; RV32-NEXT:    add t1, t1, t4
-; RV32-NEXT:    add t1, t1, t5
-; RV32-NEXT:    add a0, a0, t1
+; RV32-NEXT:    add a0, t6, a0
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add a3, a3, a5
+; RV32-NEXT:    add a3, a3, a6
+; RV32-NEXT:    add a0, a0, a3
+; RV32-NEXT:    add a7, a7, t0
+; RV32-NEXT:    add a7, a7, t1
+; RV32-NEXT:    add a7, a7, t2
+; RV32-NEXT:    add a0, a0, a7
+; RV32-NEXT:    add t3, t3, t4
+; RV32-NEXT:    add t3, t3, t5
+; RV32-NEXT:    add a0, a0, t3
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: explode_16xi32_exact_vlen:
@@ -1167,19 +1167,19 @@ define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) {
 ; RV64-NEXT:    vredxor.vs v8, v8, v9
 ; RV64-NEXT:    vmv.x.s t6, v8
 ; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    add a0, t6, a0
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    add a2, a2, a4
 ; RV64-NEXT:    add a0, a0, a2
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    add a5, a5, a7
-; RV64-NEXT:    add a5, a5, t0
-; RV64-NEXT:    add a0, a0, a5
-; RV64-NEXT:    add t1, t1, t2
-; RV64-NEXT:    add t1, t1, t3
-; RV64-NEXT:    add t1, t1, t4
-; RV64-NEXT:    add t1, t1, t5
-; RV64-NEXT:    addw a0, a0, t1
+; RV64-NEXT:    add a0, t6, a0
+; RV64-NEXT:    add a3, a3, a4
+; RV64-NEXT:    add a3, a3, a5
+; RV64-NEXT:    add a3, a3, a6
+; RV64-NEXT:    add a0, a0, a3
+; RV64-NEXT:    add a7, a7, t0
+; RV64-NEXT:    add a7, a7, t1
+; RV64-NEXT:    add a7, a7, t2
+; RV64-NEXT:    add a0, a0, a7
+; RV64-NEXT:    add t3, t3, t4
+; RV64-NEXT:    add t3, t3, t5
+; RV64-NEXT:    addw a0, a0, t3
 ; RV64-NEXT:    ret
   %e0 = extractelement <16 x i32> %v, i32 0
   %e1 = extractelement <16 x i32> %v, i32 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index 5574d12d2d5dd8d..2a4d400591f3038 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -324,32 +324,32 @@ define <16 x i64> @stepvector_v16i64() {
 ; RV32LMULMAX1-NEXT:    vsext.vf4 v8, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 48
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 2
-; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
+; RV32LMULMAX1-NEXT:    vmv.s.x v16, a0
 ; RV32LMULMAX1-NEXT:    lui a0, 80
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 4
-; RV32LMULMAX1-NEXT:    vmv.s.x v11, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v11
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 112
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 6
-; RV32LMULMAX1-NEXT:    vmv.s.x v12, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v12
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 144
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 8
-; RV32LMULMAX1-NEXT:    vmv.s.x v13, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v12, v13
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v12, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 176
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 10
-; RV32LMULMAX1-NEXT:    vmv.s.x v14, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v13, v14
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v13, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 208
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 12
-; RV32LMULMAX1-NEXT:    vmv.s.x v15, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v14, v15
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v14, v9
 ; RV32LMULMAX1-NEXT:    lui a0, 240
 ; RV32LMULMAX1-NEXT:    addi a0, a0, 14
-; RV32LMULMAX1-NEXT:    vmv.s.x v16, a0
-; RV32LMULMAX1-NEXT:    vsext.vf4 v15, v16
+; RV32LMULMAX1-NEXT:    vmv.s.x v9, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v15, v9
+; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v16
 ; RV32LMULMAX1-NEXT:    ret
 ;
 ; RV64LMULMAX1-LABEL: stepvector_v16i64:
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 3335ca3a34b6c6d..cdc6d9e2bceb303 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -458,32 +458,32 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV32IM-NEXT:    add a6, a6, a7
 ; RV32IM-NEXT:    li a7, 95
 ; RV32IM-NEXT:    mul t0, a6, a7
-; RV32IM-NEXT:    mulh t1, a1, a5
-; RV32IM-NEXT:    add t1, t1, a1
-; RV32IM-NEXT:    srli t2, t1, 31
-; RV32IM-NEXT:    srai t1, t1, 6
-; RV32IM-NEXT:    add t1, t1, t2
-; RV32IM-NEXT:    mul t2, t1, a7
-; RV32IM-NEXT:    mulh t3, a3, a5
-; RV32IM-NEXT:    add t3, t3, a3
-; RV32IM-NEXT:    srli t4, t3, 31
-; RV32IM-NEXT:    srai t3, t3, 6
-; RV32IM-NEXT:    add t3, t3, t4
-; RV32IM-NEXT:    mul t4, t3, a7
+; RV32IM-NEXT:    sub a4, a4, t0
+; RV32IM-NEXT:    mulh t0, a1, a5
+; RV32IM-NEXT:    add t0, t0, a1
+; RV32IM-NEXT:    srli t1, t0, 31
+; RV32IM-NEXT:    srai t0, t0, 6
+; RV32IM-NEXT:    add t0, t0, t1
+; RV32IM-NEXT:    mul t1, t0, a7
+; RV32IM-NEXT:    mulh t2, a3, a5
+; RV32IM-NEXT:    add t2, t2, a3
+; RV32IM-NEXT:    srli t3, t2, 31
+; RV32IM-NEXT:    srai t2, t2, 6
+; RV32IM-NEXT:    add t2, t2, t3
+; RV32IM-NEXT:    mul t3, t2, a7
 ; RV32IM-NEXT:    mulh a5, a2, a5
 ; RV32IM-NEXT:    add a5, a5, a2
-; RV32IM-NEXT:    srli t5, a5, 31
+; RV32IM-NEXT:    srli t4, a5, 31
 ; RV32IM-NEXT:    srai a5, a5, 6
-; RV32IM-NEXT:    add a5, a5, t5
+; RV32IM-NEXT:    add a5, a5, t4
 ; RV32IM-NEXT:    mul a7, a5, a7
 ; RV32IM-NEXT:    add a2, a2, a5
 ; RV32IM-NEXT:    sub a2, a2, a7
-; RV32IM-NEXT:    add a3, a3, t3
-; RV32IM-NEXT:    sub a3, a3, t4
-; RV32IM-NEXT:    add a1, a1, t1
-; RV32IM-NEXT:    sub a1, a1, t2
+; RV32IM-NEXT:    add a3, a3, t2
+; RV32IM-NEXT:    sub a3, a3, t3
+; RV32IM-NEXT:    add a1, a1, t0
+; RV32IM-NEXT:    sub a1, a1, t1
 ; RV32IM-NEXT:    add a4, a4, a6
-; RV32IM-NEXT:    sub a4, a4, t0
 ; RV32IM-NEXT:    sh a4, 6(a0)
 ; RV32IM-NEXT:    sh a1, 4(a0)
 ; RV32IM-NEXT:    sh a3, 2(a0)
@@ -575,32 +575,32 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    add a6, a6, a7
 ; RV64IM-NEXT:    li a7, 95
 ; RV64IM-NEXT:    mul t0, a6, a7
-; RV64IM-NEXT:    mulh t1, a1, a3
-; RV64IM-NEXT:    add t1, t1, a1
-; RV64IM-NEXT:    srli t2, t1, 63
-; RV64IM-NEXT:    srai t1, t1, 6
-; RV64IM-NEXT:    add t1, t1, t2
-; RV64IM-NEXT:    mul t2, t1, a7
-; RV64IM-NEXT:    mulh t3, a5, a3
-; RV64IM-NEXT:    add t3, t3, a5
-; RV64IM-NEXT:    srli t4, t3, 63
-; RV64IM-NEXT:    srai t3, t3, 6
-; RV64IM-NEXT:    add t3, t3, t4
-; RV64IM-NEXT:    mul t4, t3, a7
+; RV64IM-NEXT:    subw a2, a2, t0
+; RV64IM-NEXT:    mulh t0, a1, a3
+; RV64IM-NEXT:    add t0, t0, a1
+; RV64IM-NEXT:    srli t1, t0, 63
+; RV64IM-NEXT:    srai t0, t0, 6
+; RV64IM-NEXT:    add t0, t0, t1
+; RV64IM-NEXT:    mul t1, t0, a7
+; RV64IM-NEXT:    mulh t2, a5, a3
+; RV64IM-NEXT:    add t2, t2, a5
+; RV64IM-NEXT:    srli t3, t2, 63
+; RV64IM-NEXT:    srai t2, t2, 6
+; RV64IM-NEXT:    add t2, t2, t3
+; RV64IM-NEXT:    mul t3, t2, a7
 ; RV64IM-NEXT:    mulh a3, a4, a3
 ; RV64IM-NEXT:    add a3, a3, a4
-; RV64IM-NEXT:    srli t5, a3, 63
+; RV64IM-NEXT:    srli t4, a3, 63
 ; RV64IM-NEXT:    srai a3, a3, 6
-; RV64IM-NEXT:    add a3, a3, t5
+; RV64IM-NEXT:    add a3, a3, t4
 ; RV64IM-NEXT:    mul a7, a3, a7
 ; RV64IM-NEXT:    add a3, a4, a3
 ; RV64IM-NEXT:    subw a3, a3, a7
-; RV64IM-NEXT:    add a5, a5, t3
-; RV64IM-NEXT:    subw a4, a5, t4
-; RV64IM-NEXT:    add a1, a1, t1
-; RV64IM-NEXT:    subw a1, a1, t2
+; RV64IM-NEXT:    add a5, a5, t2
+; RV64IM-NEXT:    subw a4, a5, t3
+; RV64IM-NEXT:    add a1, a1, t0
+; RV64IM-NEXT:    subw a1, a1, t1
 ; RV64IM-NEXT:    add a2, a2, a6
-; RV64IM-NEXT:    subw a2, a2, t0
 ; RV64IM-NEXT:    sh a2, 6(a0)
 ; RV64IM-NEXT:    sh a1, 4(a0)
 ; RV64IM-NEXT:    sh a4, 2(a0)
diff --git a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
index c5f61b7fcdde551..e01463d1a1993a4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
@@ -224,30 +224,30 @@ define arm_aapcs_vfpcc <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b,
 ; CHECK-NEXT:    vldr s31, [sp, #188]
 ; CHECK-NEXT:    vmov.f32 s10, s4
 ; CHECK-NEXT:    vldr s30, [sp, #180]
-; CHECK-NEXT:    vmov.f32 s21, s15
-; CHECK-NEXT:    vldr s29, [sp, #172]
 ; CHECK-NEXT:    vmov.f32 s5, s3
-; CHECK-NEXT:    vldr s28, [sp, #164]
+; CHECK-NEXT:    vldr s29, [sp, #172]
 ; CHECK-NEXT:    vmov.f32 s4, s1
+; CHECK-NEXT:    vldr s28, [sp, #164]
 ; CHECK-NEXT:    vmov.f32 s24, s9
 ; CHECK-NEXT:    vmov.f32 s16, s12
 ; CHECK-NEXT:    vstrw.32 q6, [sp, #32] @ 16-byte Spill
 ; CHECK-NEXT:    vmov.f32 s12, s8
 ; CHECK-NEXT:    vldr s27, [sp, #184]
-; CHECK-NEXT:    vmov.f32 s17, s14
+; CHECK-NEXT:    vmov.f32 s21, s15
 ; CHECK-NEXT:    vldr s26, [sp, #176]
 ; CHECK-NEXT:    vmov.f32 s9, s2
 ; CHECK-NEXT:    vldr s25, [sp, #168]
 ; CHECK-NEXT:    vmov.f32 s8, s0
 ; CHECK-NEXT:    vmul.f32 q0, q5, q1
+; CHECK-NEXT:    vmov.f32 s17, s14
 ; CHECK-NEXT:    vmul.f32 q1, q4, q1
 ; CHECK-NEXT:    vneg.f32 q0, q0
 ; CHECK-NEXT:    vldr s24, [sp, #160]
 ; CHECK-NEXT:    vfma.f32 q1, q5, q2
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT:    vstrw.32 q3, [sp, #48] @ 16-byte Spill
 ; CHECK-NEXT:    vsub.f32 q6, q6, q1
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    vstrw.32 q3, [sp, #48] @ 16-byte Spill
 ; CHECK-NEXT:    vldr s13, [sp, #156]
 ; CHECK-NEXT:    vfma.f32 q1, q4, q2
 ; CHECK-NEXT:    vldr s12, [sp, #148]
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 94210d795867a0f..ea978fe873917aa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -628,26 +628,26 @@ define arm_aapcs_vfpcc <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    vcvt.s32.f16 s7, s2
 ; CHECK-NEXT:    vcvt.s32.f16 s14, s0
 ; CHECK-NEXT:    vmov r1, s5
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vmov r2, s7
 ; CHECK-NEXT:    vcvt.s32.f16 s8, s1
+; CHECK-NEXT:    vmov r2, s7
+; CHECK-NEXT:    vmovx.f16 s4, s1
 ; CHECK-NEXT:    vmov q4[2], q4[0], r2, r1
 ; CHECK-NEXT:    vmov r1, s12
 ; CHECK-NEXT:    vmov r2, s14
-; CHECK-NEXT:    vcvt.s32.f16 s4, s4
-; CHECK-NEXT:    vmov q4[3], q4[1], r2, r1
 ; CHECK-NEXT:    vcvt.s32.f16 s6, s6
+; CHECK-NEXT:    vmov q4[3], q4[1], r2, r1
 ; CHECK-NEXT:    vmov r1, s8
-; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vmov r2, s10
-; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vcvt.s32.f16 s4, s4
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r1
 ; CHECK-NEXT:    vmov r1, s4
 ; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    vmax.s32 q3, q4, q0
+; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r1
-; CHECK-NEXT:    vstrh.32 q3, [r0, #8]
+; CHECK-NEXT:    vmax.s32 q3, q4, q0
+; CHECK-NEXT:    mov r0, sp
 ; CHECK-NEXT:    vmax.s32 q0, q2, q0
+; CHECK-NEXT:    vstrh.32 q3, [r0, #8]
 ; CHECK-NEXT:    vstrh.32 q0, [r0]
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
 ; CHECK-NEXT:    add sp, #16
@@ -1768,26 +1768,26 @@ define arm_aapcs_vfpcc <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    vcvt.s32.f16 s7, s2
 ; CHECK-NEXT:    vcvt.s32.f16 s14, s0
 ; CHECK-NEXT:    vmov r1, s5
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vmov r2, s7
 ; CHECK-NEXT:    vcvt.s32.f16 s8, s1
+; CHECK-NEXT:    vmov r2, s7
+; CHECK-NEXT:    vmovx.f16 s4, s1
 ; CHECK-NEXT:    vmov q4[2], q4[0], r2, r1
 ; CHECK-NEXT:    vmov r1, s12
 ; CHECK-NEXT:    vmov r2, s14
-; CHECK-NEXT:    vcvt.s32.f16 s4, s4
-; CHECK-NEXT:    vmov q4[3], q4[1], r2, r1
 ; CHECK-NEXT:    vcvt.s32.f16 s6, s6
+; CHECK-NEXT:    vmov q4[3], q4[1], r2, r1
 ; CHECK-NEXT:    vmov r1, s8
-; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vmov r2, s10
-; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vcvt.s32.f16 s4, s4
 ; CHECK-NEXT:    vmov q2[2], q2[0], r2, r1
 ; CHECK-NEXT:    vmov r1, s4
 ; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    vmax.s32 q3, q4, q0
+; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vmov q2[3], q2[1], r2, r1
-; CHECK-NEXT:    vstrh.32 q3, [r0, #8]
+; CHECK-NEXT:    vmax.s32 q3, q4, q0
+; CHECK-NEXT:    mov r0, sp
 ; CHECK-NEXT:    vmax.s32 q0, q2, q0
+; CHECK-NEXT:    vstrh.32 q3, [r0, #8]
 ; CHECK-NEXT:    vstrh.32 q0, [r0]
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
 ; CHECK-NEXT:    add sp, #16
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 3ca01cfa3a8f773..523fe9503272c6d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -4258,14 +4258,14 @@ define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-NEXT:    .vsave {d8}
 ; CHECK-NEXT:    vpush {d8}
 ; CHECK-NEXT:    vcvtb.f32.f16 s15, s0
-; CHECK-NEXT:    vmov.f32 s5, #-1.000000e+00
 ; CHECK-NEXT:    vldr s7, .LCPI42_0
+; CHECK-NEXT:    vmov.f32 s5, #-1.000000e+00
 ; CHECK-NEXT:    vmaxnm.f32 s16, s15, s5
+; CHECK-NEXT:    vminnm.f32 s16, s16, s7
 ; CHECK-NEXT:    vcvtt.f32.f16 s12, s2
+; CHECK-NEXT:    vcvt.s32.f32 s16, s16
 ; CHECK-NEXT:    vcvtt.f32.f16 s9, s1
-; CHECK-NEXT:    vminnm.f32 s16, s16, s7
 ; CHECK-NEXT:    vcvtt.f32.f16 s4, s3
-; CHECK-NEXT:    vcvt.s32.f32 s16, s16
 ; CHECK-NEXT:    vcvtb.f32.f16 s8, s3
 ; CHECK-NEXT:    vcvtb.f32.f16 s2, s2
 ; CHECK-NEXT:    vcvtb.f32.f16 s1, s1
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index 8ea12bd1fc0deb4..9302f705d2825ab 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -3352,8 +3352,8 @@ define arm_aapcs_vfpcc <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
 ; CHECK-NEXT:    vcvtt.f32.f16 s8, s3
 ; CHECK-NEXT:    vcvtb.f32.f16 s10, s3
 ; CHECK-NEXT:    vcvtb.f32.f16 s3, s0
-; CHECK-NEXT:    vmov.f32 s6, #1.000000e+00
 ; CHECK-NEXT:    vmaxnm.f32 s3, s3, s4
+; CHECK-NEXT:    vmov.f32 s6, #1.000000e+00
 ; CHECK-NEXT:    vminnm.f32 s3, s3, s6
 ; CHECK-NEXT:    vcvtt.f32.f16 s0, s0
 ; CHECK-NEXT:    vcvt.u32.f32 s3, s3
diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
index d2f79fcd5fd9826..2c4ddf140c23d1a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
@@ -165,21 +165,21 @@ define dso_local i32 @e() #0 {
 ; CHECK-NEXT:    vmov q4, q7
 ; CHECK-NEXT:    vmov.32 q0[0], r2
 ; CHECK-NEXT:    vmov.32 q7[1], r2
-; CHECK-NEXT:    vmov s21, r2
-; CHECK-NEXT:    movs r1, #64
 ; CHECK-NEXT:    vmov.f32 s20, s12
-; CHECK-NEXT:    str r0, [sp, #40]
+; CHECK-NEXT:    movs r1, #64
 ; CHECK-NEXT:    vmov.f32 s22, s13
-; CHECK-NEXT:    str r6, [r0]
+; CHECK-NEXT:    str r0, [sp, #40]
 ; CHECK-NEXT:    vmov.f32 s23, s15
+; CHECK-NEXT:    str r6, [r0]
+; CHECK-NEXT:    vmov s21, r2
 ; CHECK-NEXT:    str r0, [r0]
 ; CHECK-NEXT:    vstrw.32 q5, [r0]
 ; CHECK-NEXT:    vstrw.32 q7, [r0]
 ; CHECK-NEXT:    vstrw.32 q0, [r0]
 ; CHECK-NEXT:    vstrw.32 q6, [r0]
-; CHECK-NEXT:    mov.w r8, #0
 ; CHECK-NEXT:    vmov q1[2], q1[0], r4, r3
 ; CHECK-NEXT:    vmov q2[2], q2[0], r3, r3
+; CHECK-NEXT:    mov.w r8, #0
 ; CHECK-NEXT:    mov.w r12, #4
 ; CHECK-NEXT:    vmov q1[3], q1[1], r2, r4
 ; CHECK-NEXT:    vmov.f32 s14, s13
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index f4643f8c6c4a1f5..5bac1633737a862 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -374,11 +374,11 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3step_i16(<32 x i16> %src) {
 ; CHECK-NEXT:    vins.f16 s6, s2
 ; CHECK-NEXT:    vins.f16 s9, s4
 ; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    vmov.f32 s2, s6
 ; CHECK-NEXT:    vins.f16 s14, s8
+; CHECK-NEXT:    vmov.f32 s3, s9
 ; CHECK-NEXT:    vins.f16 s15, s11
 ; CHECK-NEXT:    vins.f16 s13, s5
-; CHECK-NEXT:    vmov.f32 s2, s6
-; CHECK-NEXT:    vmov.f32 s3, s9
 ; CHECK-NEXT:    vadd.i16 q0, q0, q3
 ; CHECK-NEXT:    vadd.i16 q0, q0, q4
 ; CHECK-NEXT:    vpop {d8, d9}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
index 219541cffb940fa..6c7ac961817b036 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
@@ -93,9 +93,9 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo
 ; CHECK-NEXT:    vmovx.f16 s22, s15
 ; CHECK-NEXT:    vins.f16 s15, s27
 ; CHECK-NEXT:    vmovx.f16 s8, s27
-; CHECK-NEXT:    vins.f16 s12, s24
 ; CHECK-NEXT:    vins.f16 s13, s25
 ; CHECK-NEXT:    vins.f16 s3, s11
+; CHECK-NEXT:    vins.f16 s12, s24
 ; CHECK-NEXT:    vins.f16 s1, s9
 ; CHECK-NEXT:    vins.f16 s2, s10
 ; CHECK-NEXT:    vins.f16 s22, s8
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index d80dd5a673e20f2..05209b74cdfed54 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -491,6 +491,7 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmov.f32 s8, s6
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #32] @ 16-byte Reload
 ; CHECK-NEXT:    vmov.f32 s18, s31
+; CHECK-NEXT:    vstrw.32 q4, [r1, #80]
 ; CHECK-NEXT:    vmov.u16 r0, q1[3]
 ; CHECK-NEXT:    vins.f16 s8, s6
 ; CHECK-NEXT:    vmov.16 q1[2], r0
@@ -507,7 +508,6 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vstrw.32 q6, [r1, #48]
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT:    vstrw.32 q4, [r1, #80]
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #32]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
@@ -1039,17 +1039,17 @@ define void @vst3_v8f32(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmov.f32 s13, s25
 ; CHECK-NEXT:    vmov.f32 s9, s7
 ; CHECK-NEXT:    vstrw.32 q3, [r1, #64]
-; CHECK-NEXT:    vmov.f32 s21, s16
 ; CHECK-NEXT:    vmov.f32 s22, s28
 ; CHECK-NEXT:    vmov.f32 s8, s30
-; CHECK-NEXT:    vstrw.32 q5, [r1]
 ; CHECK-NEXT:    vmov.f32 s10, s19
 ; CHECK-NEXT:    vmov.f32 s11, s31
 ; CHECK-NEXT:    vmov.f32 s5, s29
 ; CHECK-NEXT:    vstrw.32 q2, [r1, #32]
 ; CHECK-NEXT:    vmov.f32 s4, s17
 ; CHECK-NEXT:    vmov.f32 s7, s18
+; CHECK-NEXT:    vmov.f32 s21, s16
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
+; CHECK-NEXT:    vstrw.32 q5, [r1]
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    bx lr
@@ -1135,13 +1135,13 @@ define void @vst3_v16f32(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmov.f32 s31, s9
 ; CHECK-NEXT:    vldrw.u32 q2, [sp, #80] @ 16-byte Reload
 ; CHECK-NEXT:    vmov.f32 s12, s29
-; CHECK-NEXT:    vmov.f32 s29, s4
 ; CHECK-NEXT:    vstrw.32 q3, [r1, #160]
+; CHECK-NEXT:    vmov.f32 s29, s4
 ; CHECK-NEXT:    vmov.f32 s16, s5
 ; CHECK-NEXT:    vstrw.32 q7, [r1, #96]
 ; CHECK-NEXT:    vmov.f32 s19, s6
-; CHECK-NEXT:    vmov.f32 s4, s8
 ; CHECK-NEXT:    vstrw.32 q4, [r1, #112]
+; CHECK-NEXT:    vmov.f32 s4, s8
 ; CHECK-NEXT:    vmov.f32 s6, s20
 ; CHECK-NEXT:    vmov.f32 s20, s22
 ; CHECK-NEXT:    vmov.f32 s5, s0
@@ -1441,10 +1441,8 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vldrw.u32 q5, [sp, #16] @ 16-byte Reload
 ; CHECK-NEXT:    vstrw.32 q0, [r1, #80]
 ; CHECK-NEXT:    vldrw.u32 q0, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vmov.f32 s26, s15
-; CHECK-NEXT:    vins.f16 s29, s12
 ; CHECK-NEXT:    vmov.f32 s21, s8
-; CHECK-NEXT:    vstrw.32 q6, [r1, #32]
+; CHECK-NEXT:    vins.f16 s29, s12
 ; CHECK-NEXT:    vmov.f32 s4, s9
 ; CHECK-NEXT:    vstrw.32 q5, [r1, #48]
 ; CHECK-NEXT:    vmov.f32 s7, s10
@@ -1452,7 +1450,9 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmov.f32 s28, s13
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #64]
 ; CHECK-NEXT:    vmov.f32 s31, s14
+; CHECK-NEXT:    vmov.f32 s26, s15
 ; CHECK-NEXT:    vstrw.32 q7, [r1, #16]
+; CHECK-NEXT:    vstrw.32 q6, [r1, #32]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index f3a65c40031af3e..fb50a0bc52fff47 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -442,11 +442,11 @@ define void @vst4_v8i16_align1(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmovx.f16 s6, s14
 ; CHECK-NEXT:    vmovx.f16 s4, s18
 ; CHECK-NEXT:    vins.f16 s14, s18
-; CHECK-NEXT:    vins.f16 s2, s0
-; CHECK-NEXT:    vmov.f32 s0, s13
 ; CHECK-NEXT:    vmov.f32 s24, s15
 ; CHECK-NEXT:    vins.f16 s6, s4
 ; CHECK-NEXT:    vmov.f32 s4, s14
+; CHECK-NEXT:    vins.f16 s2, s0
+; CHECK-NEXT:    vmov.f32 s0, s13
 ; CHECK-NEXT:    vstrb.8 q6, [r1, #48]
 ; CHECK-NEXT:    vstrb.8 q1, [r1, #32]
 ; CHECK-NEXT:    vstrb.8 q0, [r1, #16]
@@ -1191,8 +1191,8 @@ define void @vst4_v8f16_align1(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmovx.f16 s23, s6
 ; CHECK-NEXT:    vmovx.f16 s1, s22
 ; CHECK-NEXT:    vins.f16 s6, s22
-; CHECK-NEXT:    vins.f16 s5, s21
 ; CHECK-NEXT:    vins.f16 s4, s20
+; CHECK-NEXT:    vins.f16 s5, s21
 ; CHECK-NEXT:    vins.f16 s23, s1
 ; CHECK-NEXT:    vmovx.f16 s22, s10
 ; CHECK-NEXT:    vins.f16 s10, s26
diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll
index 1f9e7a93ad0b903..2e2abd0063de69f 100644
--- a/llvm/test/CodeGen/X86/mul-constant-result.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-result.ll
@@ -524,18 +524,15 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 define i32 @foo() local_unnamed_addr #0 {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    .cfi_def_cfa_offset 12
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 20
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
-; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:    .cfi_offset %ebx, -8
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $1
@@ -612,8 +609,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $8, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $8, %esi
 ; X86-NEXT:    pushl $4
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $9
@@ -621,9 +618,9 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    xorl $9, %esi
-; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $9, %ebx
+; X86-NEXT:    orl %esi, %ebx
 ; X86-NEXT:    pushl $5
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $10
@@ -631,9 +628,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $10, %ebx
-; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $10, %esi
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edi, %esi
 ; X86-NEXT:    pushl $5
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $11
@@ -641,10 +639,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    xorl $11, %esi
-; X86-NEXT:    orl %ebx, %esi
-; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $11, %edi
 ; X86-NEXT:    pushl $6
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $12
@@ -654,6 +650,7 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    xorl $12, %ebx
+; X86-NEXT:    orl %edi, %ebx
 ; X86-NEXT:    pushl $6
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $13
@@ -664,6 +661,7 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    xorl $13, %edi
 ; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    orl %esi, %edi
 ; X86-NEXT:    pushl $7
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $14
@@ -671,9 +669,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $14, %ebx
-; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $14, %esi
 ; X86-NEXT:    pushl $7
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $15
@@ -681,9 +678,9 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebp
-; X86-NEXT:    xorl $15, %ebp
-; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $15, %ebx
+; X86-NEXT:    orl %esi, %ebx
 ; X86-NEXT:    pushl $8
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $16
@@ -691,10 +688,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    xorl $16, %edi
-; X86-NEXT:    orl %ebp, %edi
-; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $16, %esi
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edi, %esi
 ; X86-NEXT:    pushl $8
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $17
@@ -702,8 +699,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $17, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $17, %edi
 ; X86-NEXT:    pushl $9
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $18
@@ -711,9 +708,9 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    xorl $18, %esi
-; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $18, %ebx
+; X86-NEXT:    orl %edi, %ebx
 ; X86-NEXT:    pushl $9
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $19
@@ -721,9 +718,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $19, %ebx
-; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $19, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    orl %esi, %edi
 ; X86-NEXT:    pushl $10
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $20
@@ -733,7 +731,6 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    xorl $20, %esi
-; X86-NEXT:    orl %ebx, %esi
 ; X86-NEXT:    pushl $10
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $21
@@ -762,8 +759,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $23, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $23, %edi
 ; X86-NEXT:    pushl $12
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $24
@@ -771,9 +768,9 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    xorl $24, %edi
-; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $24, %ebx
+; X86-NEXT:    orl %edi, %ebx
 ; X86-NEXT:    pushl $12
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $25
@@ -781,9 +778,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    xorl $25, %ebx
-; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $25, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    orl %esi, %edi
 ; X86-NEXT:    pushl $13
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $26
@@ -791,9 +789,8 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    xorl $26, %edi
-; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $26, %esi
 ; X86-NEXT:    pushl $13
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $27
@@ -803,7 +800,7 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    xorl $27, %ebx
-; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    orl %esi, %ebx
 ; X86-NEXT:    pushl $14
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $28
@@ -811,9 +808,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %ebp
-; X86-NEXT:    xorl $28, %ebp
-; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $28, %esi
+; X86-NEXT:    orl %ebx, %esi
+; X86-NEXT:    orl %edi, %esi
 ; X86-NEXT:    pushl $14
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $29
@@ -823,8 +821,6 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    xorl $29, %edi
-; X86-NEXT:    orl %ebp, %edi
-; X86-NEXT:    orl %esi, %edi
 ; X86-NEXT:    pushl $15
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $30
@@ -834,6 +830,7 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    xorl $30, %ebx
+; X86-NEXT:    orl %edi, %ebx
 ; X86-NEXT:    pushl $15
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $31
@@ -841,10 +838,10 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    calll mult at PLT
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    xorl $31, %esi
-; X86-NEXT:    orl %ebx, %esi
-; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $31, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    orl %esi, %edi
 ; X86-NEXT:    pushl $16
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $32
@@ -854,17 +851,15 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    xorl $32, %eax
 ; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %edi, %eax
 ; X86-NEXT:    setne %cl
 ; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 16
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    .cfi_def_cfa_offset 12
-; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %edi
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    popl %ebp
+; X86-NEXT:    popl %ebx
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index 3d3e935045475e5..80aa637e42ba3e0 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -383,6 +383,7 @@ define dso_local i32 @sad_avx64i8() nounwind {
 ; AVX1-NEXT:  # %bb.2: # %middle.block
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpaddd %xmm4, %xmm4, %xmm5
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
 ; AVX1-NEXT:    vpaddd %xmm0, %xmm0, %xmm7
@@ -393,9 +394,9 @@ define dso_local i32 @sad_avx64i8() nounwind {
 ; AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpaddd %xmm5, %xmm4, %xmm1
-; AVX1-NEXT:    vpaddd %xmm1, %xmm6, %xmm2
-; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpaddd %xmm1, %xmm6, %xmm1
+; AVX1-NEXT:    vpaddd %xmm5, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index 216332943993e37..481d11963302f9a 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -1429,7 +1429,6 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){
 ; AVX2-NEXT:    vpshufb %ymm10, %ymm3, %ymm3
 ; AVX2-NEXT:    vpshufb %ymm11, %ymm4, %ymm4
 ; AVX2-NEXT:    vpor %ymm4, %ymm3, %ymm3
-; AVX2-NEXT:    vpaddb %ymm3, %ymm8, %ymm3
 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm4 = ymm7[5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20]
 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm6 = [8,11,14,0,0,0,2,5,8,11,14,0,0,0,2,5,8,11,14,0,0,0,2,5,8,11,14,0,0,0,2,5]
 ; AVX2-NEXT:    vpshufb %ymm6, %ymm0, %ymm0
@@ -1440,7 +1439,8 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){
 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm2 = ymm5[5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20]
 ; AVX2-NEXT:    vpshufb %ymm6, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpaddb %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddb %ymm1, %ymm8, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_load_vf64_i8_stride3:



More information about the llvm-commits mailing list