[llvm] [llvm][ScheduleDAG] SUnit::biasCriticalPath() does not find the critical path consistently (PR #93001)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 22 02:33:38 PDT 2024


https://github.com/csstormq updated https://github.com/llvm/llvm-project/pull/93001

>From 3218e892302ced9563b2d03472aaf6f5f25e068d Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Thu, 16 May 2024 16:43:18 +0800
Subject: [PATCH 1/2] [llvm][ScheduleDAG] SUnit::biasCriticalPath() does not
 find the critical path consistently

Patch co-authored by AtariDreams (gfunni234 at gmail.com).

Fixes #38037.

[AMDGPU] Update test results to fix build (#92982)
---
 llvm/lib/CodeGen/ScheduleDAG.cpp              |    4 +-
 llvm/test/CodeGen/AMDGPU/fp_to_sint.ll        |  395 ++--
 llvm/test/CodeGen/AMDGPU/fp_to_uint.ll        |  395 ++--
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          | 1592 ++++++++---------
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        | 1592 ++++++++---------
 llvm/test/CodeGen/AMDGPU/shl.ll               |  216 +--
 .../test/CodeGen/X86/misched-critical-path.ll |  240 +++
 7 files changed, 2320 insertions(+), 2114 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/misched-critical-path.ll

diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index de8e6f63794dc..8d9a5041fc2fe 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -331,8 +331,10 @@ void SUnit::biasCriticalPath() {
   unsigned MaxDepth = BestI->getSUnit()->getDepth();
   for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
        ++I) {
-    if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+    if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) {
+      MaxDepth = I->getSUnit()->getDepth();
       BestI = I;
+    }
   }
   if (BestI != Preds.begin())
     std::swap(*Preds.begin(), *BestI);
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 64063f65e288f..04ef30bd26aa5 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -253,25 +253,25 @@ define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) {
 ; EG-NEXT:     ADD_INT * T2.W, PV.W, literal.y,
 ; EG-NEXT:    8388608(1.175494e-38), -150(nan)
 ; EG-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; EG-NEXT:     SUB_INT T0.Y, literal.y, T0.W,
-; EG-NEXT:     AND_INT T0.Z, PS, literal.z,
+; EG-NEXT:     AND_INT T0.Y, PS, literal.y,
+; EG-NEXT:     SUB_INT T0.Z, literal.z, T0.W,
 ; EG-NEXT:     NOT_INT T0.W, PS,
 ; EG-NEXT:     LSHR * T3.W, PV.W, 1,
-; EG-NEXT:    -127(nan), 150(2.101948e-43)
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:    -127(nan), 31(4.344025e-44)
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
 ; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T1.Y, T1.W, PV.Z,
-; EG-NEXT:     AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT T1.Y, PV.Z, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z,
+; EG-NEXT:     LSHL T0.W, T1.W, PV.Y,
+; EG-NEXT:     AND_INT * T1.W, T2.W, literal.x,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT T0.Y, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT:     CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
+; EG-NEXT:     CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT:     CNDE_INT T0.W, PS, PV.X, PV.W,
 ; EG-NEXT:     SETGT_INT * T1.W, T0.X, literal.x,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T0.Z, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T0.W, PS, PV.Y, PV.Z,
+; EG-NEXT:     CNDE_INT T1.Z, PS, 0.0, PV.W,
+; EG-NEXT:     CNDE_INT T0.W, PS, PV.Z, PV.Y,
 ; EG-NEXT:     ASHR * T1.W, KC0[2].Z, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ; EG-NEXT:     XOR_INT T0.W, PV.W, PS,
@@ -364,79 +364,78 @@ define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %
 ;
 ; EG-LABEL: fp_to_sint_v2i64:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 75, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 74, @4, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
-; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT:     BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T2.W, PV.W, literal.z,
-; EG-NEXT:    8388607(1.175494e-38), 23(3.222986e-44)
+; EG-NEXT:     BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T0.W, KC0[2].W, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T1.Z, KC0[2].W, literal.y,
+; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     ADD_INT T1.W, PV.W, literal.x,
+; EG-NEXT:     ADD_INT * T2.W, PV.Z, literal.x,
 ; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T0.X, literal.x, PV.W,
-; EG-NEXT:     SUB_INT T0.Y, literal.x, T1.W,
-; EG-NEXT:     AND_INT T1.Z, PS, literal.y,
-; EG-NEXT:     OR_INT T3.W, PV.Z, literal.z,
+; EG-NEXT:     AND_INT T0.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Y, PV.W, literal.x,
+; EG-NEXT:     OR_INT T1.Z, T1.Z, literal.y,
+; EG-NEXT:     SUB_INT T3.W, literal.z, T0.W,
 ; EG-NEXT:     AND_INT * T4.W, KC0[3].X, literal.w,
-; EG-NEXT:    150(2.101948e-43), 31(4.344025e-44)
-; EG-NEXT:    8388608(1.175494e-38), 8388607(1.175494e-38)
+; EG-NEXT:    31(4.344025e-44), 8388608(1.175494e-38)
+; EG-NEXT:    150(2.101948e-43), 8388607(1.175494e-38)
 ; EG-NEXT:     OR_INT T1.X, PS, literal.x,
-; EG-NEXT:     LSHL T1.Y, PV.W, PV.Z,
-; EG-NEXT:     AND_INT T0.Z, T2.W, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
-; EG-NEXT:     AND_INT * T5.W, PV.Y, literal.y,
+; EG-NEXT:     AND_INT T1.Y, PV.W, literal.y,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W,
+; EG-NEXT:     LSHL T3.W, PV.Z, PV.Y,
+; EG-NEXT:     AND_INT * T4.W, T1.W, literal.y,
 ; EG-NEXT:    8388608(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT:     CNDE_INT T2.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
-; EG-NEXT:     ADD_INT T1.Z, T0.W, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
-; EG-NEXT:     AND_INT * T5.W, T0.X, literal.y,
-; EG-NEXT:    -150(nan), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T0.Y, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T2.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT:     LSHL T5.W, PV.X, T0.X,
+; EG-NEXT:     AND_INT * T6.W, T2.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT T0.X, PS, PV.W, 0.0,
-; EG-NEXT:     NOT_INT T2.Y, T2.W,
-; EG-NEXT:     AND_INT T2.Z, PV.Z, literal.x,
-; EG-NEXT:     NOT_INT T2.W, PV.Z,
-; EG-NEXT:     LSHR * T4.W, T1.X, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T3.X, T3.W, 1,
-; EG-NEXT:     ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T0.W, T1.X, PV.Z,
-; EG-NEXT:     AND_INT * T2.W, T1.Z, literal.y,
+; EG-NEXT:     NOT_INT T1.Y, T1.W,
+; EG-NEXT:     SUB_INT T3.Z, literal.x, T0.Z,
+; EG-NEXT:     NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212
+; EG-NEXT:     LSHR * T2.W, T1.X, 1,
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT:     LSHR T2.X, T1.Z, 1,
+; EG-NEXT:     ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z,
+; EG-NEXT:     AND_INT * T2.W, PV.Z, literal.y,
 ; EG-NEXT:    -127(nan), 32(4.484155e-44)
 ; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T4.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
-; EG-NEXT:     ADD_INT * T1.W, T1.W, literal.y,
+; EG-NEXT:     CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122
+; EG-NEXT:     SETGT_INT T0.Z, PV.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y,
+; EG-NEXT:     ADD_INT * T0.W, T0.W, literal.y,
 ; EG-NEXT:    23(3.222986e-44), -127(nan)
-; EG-NEXT:     CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
+; EG-NEXT:     CNDE_INT T2.X, T4.W, PV.W, T3.W,
 ; EG-NEXT:     SETGT_INT T1.Y, PS, literal.x,
-; EG-NEXT:     CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT:     CNDE_INT T0.W, PV.Z, T0.X, PV.X,
+; EG-NEXT:     CNDE_INT T1.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.X, T0.X,
 ; EG-NEXT:     ASHR * T2.W, KC0[3].X, literal.y,
 ; EG-NEXT:    23(3.222986e-44), 31(4.344025e-44)
 ; EG-NEXT:     XOR_INT T0.X, PV.W, PS,
-; EG-NEXT:     XOR_INT T2.Y, PV.Z, PS,
+; EG-NEXT:     XOR_INT T3.Y, PV.Z, PS,
 ; EG-NEXT:     CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
-; EG-NEXT:     CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Y, T2.Z, T0.Y,
 ; EG-NEXT:     ASHR * T3.W, KC0[2].W, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ; EG-NEXT:     XOR_INT T0.Y, PV.W, PS,
 ; EG-NEXT:     XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT:     SUB_INT T0.W, PV.Y, T2.W,
+; EG-NEXT:     SUB_INT T1.W, PV.Y, T2.W,
 ; EG-NEXT:     SUBB_UINT * T4.W, PV.X, T2.W,
 ; EG-NEXT:     SUB_INT T1.Y, PV.W, PS,
-; EG-NEXT:     SETGT_INT T1.Z, 0.0, T3.Y,
-; EG-NEXT:     SUB_INT T0.W, PV.Z, T3.W,
+; EG-NEXT:     SETGT_INT T1.Z, 0.0, T2.Y,
+; EG-NEXT:     SUB_INT T1.W, PV.Z, T3.W,
 ; EG-NEXT:     SUBB_UINT * T4.W, PV.Y, T3.W,
 ; EG-NEXT:     SUB_INT T0.Z, PV.W, PS,
-; EG-NEXT:     SETGT_INT T0.W, 0.0, T1.W,
+; EG-NEXT:     SETGT_INT T0.W, 0.0, T0.W,
 ; EG-NEXT:     CNDE_INT * T1.W, PV.Z, PV.Y, 0.0,
 ; EG-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, 0.0,
 ; EG-NEXT:     SUB_INT * T2.W, T0.X, T2.W,
@@ -567,170 +566,168 @@ define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %
 ;
 ; EG-LABEL: fp_to_sint_v4i64:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 101, @6, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    ALU 54, @108, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
+; EG-NEXT:    ALU 99, @6, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 54, @106, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 6:
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
-; EG-NEXT:     AND_INT * T2.W, KC0[4].X, literal.y,
+; EG-NEXT:     BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, KC0[3].Z, literal.y,
 ; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
-; EG-NEXT:     OR_INT T0.Z, PS, literal.x,
-; EG-NEXT:     BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T3.W, PV.W, literal.z,
-; EG-NEXT:    8388608(1.175494e-38), 23(3.222986e-44)
-; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:     ADD_INT T0.Y, PV.W, literal.x,
-; EG-NEXT:     AND_INT T1.Z, PS, literal.y,
-; EG-NEXT:     NOT_INT T4.W, PS,
-; EG-NEXT:     LSHR * T5.W, PV.Z, 1,
-; EG-NEXT:    -127(nan), 31(4.344025e-44)
+; EG-NEXT:     OR_INT T2.W, PS, literal.x,
+; EG-NEXT:     ADD_INT * T3.W, PV.W, literal.y,
+; EG-NEXT:    8388608(1.175494e-38), -150(nan)
 ; EG-NEXT:     ADD_INT T0.X, T1.W, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
-; EG-NEXT:     AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
-; EG-NEXT:     LSHL T3.W, T0.Z, PV.Z,
-; EG-NEXT:     SUB_INT * T1.W, literal.z, T1.W,
-; EG-NEXT:    -127(nan), 32(4.484155e-44)
-; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T1.X, PS, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
-; EG-NEXT:     AND_INT T0.Z, KC0[3].Z, literal.y,
-; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
-; EG-NEXT:     SETGT_INT * T4.W, PV.X, literal.z,
+; EG-NEXT:     BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W,
+; EG-NEXT:     AND_INT T0.Z, PS, literal.z,
+; EG-NEXT:     NOT_INT T4.W, PS,
+; EG-NEXT:     LSHR * T5.W, PV.W, 1,
+; EG-NEXT:    -127(nan), 23(3.222986e-44)
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
+; EG-NEXT:     AND_INT T1.Y, T3.W, literal.x,
+; EG-NEXT:     LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212
+; EG-NEXT:     AND_INT T3.W, KC0[4].X, literal.y,
+; EG-NEXT:     ADD_INT * T4.W, PV.Y, literal.z,
 ; EG-NEXT:    32(4.484155e-44), 8388607(1.175494e-38)
+; EG-NEXT:    -150(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.Y, PS, literal.x,
+; EG-NEXT:     OR_INT T1.Z, PV.W, literal.y,
+; EG-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, PV.Z,
+; EG-NEXT:     SETGT_INT * T5.W, T0.X, literal.z,
+; EG-NEXT:    31(4.344025e-44), 8388608(1.175494e-38)
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T2.X, PS, 0.0, PV.W,
-; EG-NEXT:     OR_INT T1.Y, PV.Z, literal.x,
-; EG-NEXT:     ADD_INT T0.Z, T2.W, literal.y,
-; EG-NEXT:     CNDE_INT T1.W, PV.X, PV.Y, 0.0,
-; EG-NEXT:     CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
-; EG-NEXT:    8388608(1.175494e-38), -150(nan)
-; EG-NEXT:     CNDE_INT T1.X, T4.W, PV.W, PS,
-; EG-NEXT:     ASHR T2.Y, KC0[4].X, literal.x,
-; EG-NEXT:     AND_INT T1.Z, PV.Z, literal.x,
-; EG-NEXT:     NOT_INT T1.W, PV.Z,
-; EG-NEXT:     LSHR * T3.W, PV.Y, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T3.Y, T1.Y, PV.Z,
-; EG-NEXT:     XOR_INT T1.Z, PV.X, PV.Y,
-; EG-NEXT:     XOR_INT T1.W, T2.X, PV.Y,
-; EG-NEXT:     SUB_INT * T2.W, literal.x, T2.W,
-; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T1.X, T0.Z, literal.x,
-; EG-NEXT:     AND_INT T4.Y, PS, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
-; EG-NEXT:     SUB_INT T1.W, PV.W, T2.Y,
-; EG-NEXT:     SUBB_UINT * T2.W, PV.Z, T2.Y,
-; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T2.X, PV.W, PS,
-; EG-NEXT:     CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
-; EG-NEXT:     CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
-; EG-NEXT:     CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     SETGT_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT:     CNDE_INT T3.Y, PS, 0.0, PV.W,
+; EG-NEXT:     SUB_INT T2.Z, literal.x, T1.W,
+; EG-NEXT:     LSHL T1.W, PV.Z, PV.Y,
+; EG-NEXT:     AND_INT * T3.W, T4.W, literal.y,
+; EG-NEXT:    150(2.101948e-43), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
+; EG-NEXT:     AND_INT T2.Y, PV.Z, literal.x,
+; EG-NEXT:     SUB_INT T3.Z, literal.y, T0.Y,
+; EG-NEXT:     NOT_INT T4.W, T4.W,
+; EG-NEXT:     LSHR * T6.W, T1.Z, 1,
+; EG-NEXT:    32(4.484155e-44), 150(2.101948e-43)
+; EG-NEXT:     BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z,
+; EG-NEXT:     ADD_INT T0.Y, T0.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z,
+; EG-NEXT:     AND_INT * T4.W, PV.Z, literal.y,
+; EG-NEXT:    -127(nan), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T4.Y, T3.W, PV.Z, T1.W,
+; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT:     CNDE_INT T1.W, T1.Y, T0.Z, 0.0,
+; EG-NEXT:     CNDE_INT * T2.W, T2.Y, PV.X, 0.0,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
-; EG-NEXT:     AND_INT T3.Y, KC0[3].W, literal.y,
-; EG-NEXT:     CNDE_INT T2.Z, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T1.W, PS, PV.Y, PV.Z,
-; EG-NEXT:     ASHR * T2.W, KC0[3].Z, literal.z,
-; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     CNDE_INT T2.X, T5.W, PS, PV.W,
+; EG-NEXT:     ASHR T1.Y, KC0[3].Z, literal.x,
+; EG-NEXT:     CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.X, T1.X,
+; EG-NEXT:     ASHR * T2.W, KC0[4].X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
-; EG-NEXT:     XOR_INT T1.Y, PV.W, PS,
+; EG-NEXT:     XOR_INT T2.Y, PV.W, PS,
 ; EG-NEXT:     XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT:     OR_INT T0.W, PV.Y, literal.y,
-; EG-NEXT:     SUB_INT * T1.W, literal.z, PV.X,
-; EG-NEXT:    23(3.222986e-44), 8388608(1.175494e-38)
+; EG-NEXT:     XOR_INT T1.W, PV.X, PV.Y,
+; EG-NEXT:     XOR_INT * T3.W, T3.Y, PV.Y,
+; EG-NEXT:     SUB_INT T3.Y, PS, T1.Y,
+; EG-NEXT:     SUBB_UINT T1.Z, PV.W, T1.Y,
+; EG-NEXT:     SUB_INT T3.W, PV.Z, T2.W,
+; EG-NEXT:     SUBB_UINT * T4.W, PV.Y, T2.W,
+; EG-NEXT:     SUB_INT T4.Y, PV.W, PS,
+; EG-NEXT:     SUB_INT T0.Z, PV.Y, PV.Z,
+; EG-NEXT:     BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W,
+; EG-NEXT:     AND_INT * T4.W, KC0[3].Y, literal.y,
+; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT:     ADD_INT T3.Y, PV.W, literal.x,
+; EG-NEXT:     OR_INT T1.Z, PS, literal.y,
+; EG-NEXT:     BFE_UINT T0.W, KC0[3].W, literal.z, T0.W,
+; EG-NEXT:     ADD_INT * T4.W, PV.W, literal.w,
+; EG-NEXT:    -127(nan), 8388608(1.175494e-38)
+; EG-NEXT:    23(3.222986e-44), -150(nan)
+; EG-NEXT:     AND_INT T1.X, KC0[3].W, literal.x,
+; EG-NEXT:     ADD_INT T5.Y, PV.W, literal.y,
+; EG-NEXT:     SUB_INT T2.Z, literal.z, T3.W,
+; EG-NEXT:     NOT_INT T3.W, PS,
+; EG-NEXT:     LSHR * T5.W, PV.Z, 1,
+; EG-NEXT:    8388607(1.175494e-38), -150(nan)
 ; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T4.X, KC0[3].Y, literal.x,
-; EG-NEXT:     AND_INT T3.Y, PS, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
-; EG-NEXT:     SUB_INT T1.W, PV.Z, T2.W,
-; EG-NEXT:     SUBB_UINT * T3.W, PV.Y, T2.W,
-; EG-NEXT:    8388607(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT:     SUB_INT T5.X, PV.W, PS,
-; EG-NEXT:     SETGT_INT T0.Y, 0.0, T0.Y,
-; EG-NEXT:     CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
-; EG-NEXT:     OR_INT T1.W, PV.X, literal.x,
-; EG-NEXT:     ADD_INT * T3.W, T3.X, literal.y,
-; EG-NEXT:    8388608(1.175494e-38), -150(nan)
-; EG-NEXT:     ADD_INT T4.X, T3.X, literal.x,
-; EG-NEXT:     SUB_INT T3.Y, literal.y, T3.X,
-; EG-NEXT:     AND_INT T2.Z, PS, literal.z,
-; EG-NEXT:     NOT_INT T4.W, PS,
-; EG-NEXT:     LSHR * T5.W, PV.W, 1,
-; EG-NEXT:    -127(nan), 150(2.101948e-43)
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T4.Y, T1.W, PV.Z,
-; EG-NEXT:     AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     AND_INT * T3.W, PV.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T2.X, 0.0, PS, PV.W,
+; EG-NEXT:     AND_INT T6.Y, PV.Z, literal.x,
+; EG-NEXT:     AND_INT T3.Z, PV.Y, literal.y,
+; EG-NEXT:     OR_INT T3.W, PV.X, literal.z,
+; EG-NEXT:     AND_INT * T5.W, T4.W, literal.y,
+; EG-NEXT:    32(4.484155e-44), 31(4.344025e-44)
+; EG-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z,
+; EG-NEXT:     LSHL T7.Y, T1.Z, PS,
+; EG-NEXT:     AND_INT T1.Z, T4.W, literal.x,
+; EG-NEXT:     LSHL T4.W, PV.W, PV.Z,
+; EG-NEXT:     AND_INT * T5.W, T5.Y, literal.x,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     ADD_INT T6.X, T1.X, literal.x,
-; EG-NEXT:     CNDE_INT T3.Y, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT * T3.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:    ALU clause starting at 108:
-; EG-NEXT:     CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
-; EG-NEXT:     SETGT_INT * T3.W, T4.X, literal.x,
+; EG-NEXT:     CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T8.Y, PV.Z, PV.Y, 0.0,
+; EG-NEXT:     CNDE_INT * T2.Z, T6.Y, PV.X, 0.0,
+; EG-NEXT:    ALU clause starting at 106:
+; EG-NEXT:     CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     SETGT_INT * T7.W, T3.Y, literal.x,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T3.X, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T3.Y, PS, T3.Y, T3.Z,
-; EG-NEXT:     AND_INT T2.Z, T6.X, literal.x,
-; EG-NEXT:     NOT_INT T1.W, T6.X,
-; EG-NEXT:     LSHR * T3.W, T0.W, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     ASHR T7.X, KC0[3].Y, literal.x,
-; EG-NEXT:     ADD_INT T4.Y, T1.X, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T0.W, T0.W, PV.Z,
-; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT:     CNDE_INT T1.X, PS, 0.0, PV.W,
+; EG-NEXT:     CNDE_INT T6.Y, PS, T2.Z, T8.Y,
+; EG-NEXT:     SUB_INT T1.Z, literal.x, T0.W,
+; EG-NEXT:     NOT_INT T6.W, T5.Y,
+; EG-NEXT:     LSHR * T7.W, T3.W, 1,
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT:     ASHR T2.X, KC0[3].Y, literal.x,
+; EG-NEXT:     ADD_INT T5.Y, T0.W, literal.y,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z,
+; EG-NEXT:     AND_INT * T3.W, PV.Z, literal.z,
 ; EG-NEXT:    31(4.344025e-44), -127(nan)
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T5.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     SETGT_INT T2.Z, PV.Y, literal.x,
-; EG-NEXT:     XOR_INT T0.W, T3.Y, PV.X,
-; EG-NEXT:     XOR_INT * T1.W, T3.X, PV.X,
+; EG-NEXT:     CNDE_INT T4.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T7.Y, T5.W, PV.Z, T4.W,
+; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT:     XOR_INT T0.W, T6.Y, PV.X,
+; EG-NEXT:     XOR_INT * T3.W, T1.X, PV.X,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T3.X, PS, T7.X,
-; EG-NEXT:     SUBB_UINT T3.Y, PV.W, T7.X,
-; EG-NEXT:     CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT:     CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
-; EG-NEXT:     ASHR * T3.W, KC0[3].W, literal.x,
+; EG-NEXT:     SUB_INT T1.X, PS, T2.X,
+; EG-NEXT:     SUBB_UINT T6.Y, PV.W, T2.X,
+; EG-NEXT:     CNDE_INT T2.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T3.W, PV.Z, PV.X, T3.X,
+; EG-NEXT:     ASHR * T4.W, KC0[3].W, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     XOR_INT T1.X, PV.W, PS,
-; EG-NEXT:     XOR_INT T5.Y, PV.Z, PS,
-; EG-NEXT:     SUB_INT T0.Z, PV.X, PV.Y,
-; EG-NEXT:     SETGT_INT T1.W, 0.0, T4.X, BS:VEC_021/SCL_122
-; EG-NEXT:     CNDE_INT * T6.W, T0.Y, T5.X, 0.0,
-; EG-NEXT:     SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT:     XOR_INT T3.X, PV.W, PS,
+; EG-NEXT:     XOR_INT T7.Y, PV.Z, PS,
+; EG-NEXT:     SUB_INT T1.Z, PV.X, PV.Y,
+; EG-NEXT:     SETGT_INT T3.W, 0.0, T3.Y,
+; EG-NEXT:     CNDE_INT * T6.W, T0.X, T0.Z, 0.0,
+; EG-NEXT:     SETGT_INT T1.X, 0.0, T0.Y,
 ; EG-NEXT:     CNDE_INT T6.Y, PV.W, PV.Z, 0.0,
-; EG-NEXT:     SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
-; EG-NEXT:     SUB_INT T2.W, PV.Y, T3.W,
-; EG-NEXT:     SUBB_UINT * T4.W, PV.X, T3.W,
-; EG-NEXT:     SUB_INT T3.X, PV.W, PS,
-; EG-NEXT:     SETGT_INT T1.Y, 0.0, T4.Y,
-; EG-NEXT:     CNDE_INT T6.Z, T0.Y, PV.Z, 0.0,
-; EG-NEXT:     SUB_INT T0.W, T0.W, T7.X, BS:VEC_021/SCL_122
-; EG-NEXT:     CNDE_INT * T4.W, PV.X, T2.X, 0.0,
-; EG-NEXT:     CNDE_INT T6.X, T1.W, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T4.Y, PV.Y, PV.X, 0.0,
-; EG-NEXT:     SUB_INT T0.W, T1.Z, T2.Y,
-; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT:     SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     SUB_INT T1.W, PV.Y, T4.W,
+; EG-NEXT:     SUBB_UINT * T5.W, PV.X, T4.W,
+; EG-NEXT:     SUB_INT T4.X, PV.W, PS,
+; EG-NEXT:     SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     CNDE_INT T6.Z, T0.X, PV.Z, 0.0,
+; EG-NEXT:     SUB_INT T0.W, T0.W, T2.X,
+; EG-NEXT:     CNDE_INT * T1.W, PV.X, T4.Y, 0.0,
+; EG-NEXT:     CNDE_INT T6.X, T3.W, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T1.Y, PV.Y, PV.X, 0.0,
+; EG-NEXT:     SUB_INT T0.W, T2.Y, T2.W,
+; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T4.Z, T0.X, PV.W, 0.0,
-; EG-NEXT:     SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
-; EG-NEXT:     CNDE_INT T4.X, T1.Y, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T1.Z, T1.X, PV.W, 0.0,
+; EG-NEXT:     SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212
+; EG-NEXT:     CNDE_INT T1.X, T0.Y, PV.W, 0.0,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptosi <4 x float> %x to <4 x i64>
   store <4 x i64> %conv, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
index 5170f9c76db23..5abf82aa1aab5 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -200,25 +200,25 @@ define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x
 ; EG-NEXT:     ADD_INT * T2.W, PV.W, literal.y,
 ; EG-NEXT:    8388608(1.175494e-38), -150(nan)
 ; EG-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; EG-NEXT:     SUB_INT T0.Y, literal.y, T0.W,
-; EG-NEXT:     AND_INT T0.Z, PS, literal.z,
+; EG-NEXT:     AND_INT T0.Y, PS, literal.y,
+; EG-NEXT:     SUB_INT T0.Z, literal.z, T0.W,
 ; EG-NEXT:     NOT_INT T0.W, PS,
 ; EG-NEXT:     LSHR * T3.W, PV.W, 1,
-; EG-NEXT:    -127(nan), 150(2.101948e-43)
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:    -127(nan), 31(4.344025e-44)
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
 ; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T1.Y, T1.W, PV.Z,
-; EG-NEXT:     AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT T1.Y, PV.Z, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z,
+; EG-NEXT:     LSHL T0.W, T1.W, PV.Y,
+; EG-NEXT:     AND_INT * T1.W, T2.W, literal.x,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT T0.Y, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT:     CNDE_INT T0.W, PV.Z, PV.X, PV.Y,
+; EG-NEXT:     CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT:     CNDE_INT T0.W, PS, PV.X, PV.W,
 ; EG-NEXT:     SETGT_INT * T1.W, T0.X, literal.x,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T0.Z, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T0.W, PS, PV.Y, PV.Z,
+; EG-NEXT:     CNDE_INT T1.Z, PS, 0.0, PV.W,
+; EG-NEXT:     CNDE_INT T0.W, PS, PV.Z, PV.Y,
 ; EG-NEXT:     ASHR * T1.W, KC0[2].Z, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ; EG-NEXT:     XOR_INT T0.W, PV.W, PS,
@@ -288,79 +288,78 @@ define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x
 ;
 ; EG-LABEL: fp_to_uint_v2f32_to_v2i64:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 75, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 74, @4, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W,
-; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT:     BFE_UINT T0.W, KC0[3].X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T2.W, PV.W, literal.z,
-; EG-NEXT:    8388607(1.175494e-38), 23(3.222986e-44)
+; EG-NEXT:     BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T0.W, KC0[2].W, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T1.Z, KC0[2].W, literal.y,
+; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     ADD_INT T1.W, PV.W, literal.x,
+; EG-NEXT:     ADD_INT * T2.W, PV.Z, literal.x,
 ; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T0.X, literal.x, PV.W,
-; EG-NEXT:     SUB_INT T0.Y, literal.x, T1.W,
-; EG-NEXT:     AND_INT T1.Z, PS, literal.y,
-; EG-NEXT:     OR_INT T3.W, PV.Z, literal.z,
+; EG-NEXT:     AND_INT T0.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Y, PV.W, literal.x,
+; EG-NEXT:     OR_INT T1.Z, T1.Z, literal.y,
+; EG-NEXT:     SUB_INT T3.W, literal.z, T0.W,
 ; EG-NEXT:     AND_INT * T4.W, KC0[3].X, literal.w,
-; EG-NEXT:    150(2.101948e-43), 31(4.344025e-44)
-; EG-NEXT:    8388608(1.175494e-38), 8388607(1.175494e-38)
+; EG-NEXT:    31(4.344025e-44), 8388608(1.175494e-38)
+; EG-NEXT:    150(2.101948e-43), 8388607(1.175494e-38)
 ; EG-NEXT:     OR_INT T1.X, PS, literal.x,
-; EG-NEXT:     LSHL T1.Y, PV.W, PV.Z,
-; EG-NEXT:     AND_INT T0.Z, T2.W, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y,
-; EG-NEXT:     AND_INT * T5.W, PV.Y, literal.y,
+; EG-NEXT:     AND_INT T1.Y, PV.W, literal.y,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W,
+; EG-NEXT:     LSHL T3.W, PV.Z, PV.Y,
+; EG-NEXT:     AND_INT * T4.W, T1.W, literal.y,
 ; EG-NEXT:    8388608(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT:     CNDE_INT T2.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T0.Y, PV.Z, PV.Y, 0.0,
-; EG-NEXT:     ADD_INT T1.Z, T0.W, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X,
-; EG-NEXT:     AND_INT * T5.W, T0.X, literal.y,
-; EG-NEXT:    -150(nan), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T0.Y, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T2.Z, PV.Y, PV.Z, 0.0,
+; EG-NEXT:     LSHL T5.W, PV.X, T0.X,
+; EG-NEXT:     AND_INT * T6.W, T2.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT T0.X, PS, PV.W, 0.0,
-; EG-NEXT:     NOT_INT T2.Y, T2.W,
-; EG-NEXT:     AND_INT T2.Z, PV.Z, literal.x,
-; EG-NEXT:     NOT_INT T2.W, PV.Z,
-; EG-NEXT:     LSHR * T4.W, T1.X, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T3.X, T3.W, 1,
-; EG-NEXT:     ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T0.W, T1.X, PV.Z,
-; EG-NEXT:     AND_INT * T2.W, T1.Z, literal.y,
+; EG-NEXT:     NOT_INT T1.Y, T1.W,
+; EG-NEXT:     SUB_INT T3.Z, literal.x, T0.Z,
+; EG-NEXT:     NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212
+; EG-NEXT:     LSHR * T2.W, T1.X, 1,
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT:     LSHR T2.X, T1.Z, 1,
+; EG-NEXT:     ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z,
+; EG-NEXT:     AND_INT * T2.W, PV.Z, literal.y,
 ; EG-NEXT:    -127(nan), 32(4.484155e-44)
 ; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T4.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y,
-; EG-NEXT:     ADD_INT * T1.W, T1.W, literal.y,
+; EG-NEXT:     CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122
+; EG-NEXT:     SETGT_INT T0.Z, PV.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y,
+; EG-NEXT:     ADD_INT * T0.W, T0.W, literal.y,
 ; EG-NEXT:    23(3.222986e-44), -127(nan)
-; EG-NEXT:     CNDE_INT T3.X, T0.Z, PV.W, T1.Y,
+; EG-NEXT:     CNDE_INT T2.X, T4.W, PV.W, T3.W,
 ; EG-NEXT:     SETGT_INT T1.Y, PS, literal.x,
-; EG-NEXT:     CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT:     CNDE_INT T0.W, PV.Z, T0.X, PV.X,
+; EG-NEXT:     CNDE_INT T1.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.X, T0.X,
 ; EG-NEXT:     ASHR * T2.W, KC0[3].X, literal.y,
 ; EG-NEXT:    23(3.222986e-44), 31(4.344025e-44)
 ; EG-NEXT:     XOR_INT T0.X, PV.W, PS,
-; EG-NEXT:     XOR_INT T2.Y, PV.Z, PS,
+; EG-NEXT:     XOR_INT T3.Y, PV.Z, PS,
 ; EG-NEXT:     CNDE_INT T0.Z, PV.Y, 0.0, PV.X,
-; EG-NEXT:     CNDE_INT T0.W, PV.Y, T2.X, T0.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Y, T2.Z, T0.Y,
 ; EG-NEXT:     ASHR * T3.W, KC0[2].W, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ; EG-NEXT:     XOR_INT T0.Y, PV.W, PS,
 ; EG-NEXT:     XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT:     SUB_INT T0.W, PV.Y, T2.W,
+; EG-NEXT:     SUB_INT T1.W, PV.Y, T2.W,
 ; EG-NEXT:     SUBB_UINT * T4.W, PV.X, T2.W,
 ; EG-NEXT:     SUB_INT T1.Y, PV.W, PS,
-; EG-NEXT:     SETGT_INT T1.Z, 0.0, T3.Y,
-; EG-NEXT:     SUB_INT T0.W, PV.Z, T3.W,
+; EG-NEXT:     SETGT_INT T1.Z, 0.0, T2.Y,
+; EG-NEXT:     SUB_INT T1.W, PV.Z, T3.W,
 ; EG-NEXT:     SUBB_UINT * T4.W, PV.Y, T3.W,
 ; EG-NEXT:     SUB_INT T0.Z, PV.W, PS,
-; EG-NEXT:     SETGT_INT T0.W, 0.0, T1.W,
+; EG-NEXT:     SETGT_INT T0.W, 0.0, T0.W,
 ; EG-NEXT:     CNDE_INT * T1.W, PV.Z, PV.Y, 0.0,
 ; EG-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, 0.0,
 ; EG-NEXT:     SUB_INT * T2.W, T0.X, T2.W,
@@ -449,170 +448,168 @@ define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x
 ;
 ; EG-LABEL: fp_to_uint_v4f32_to_v4i64:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 101, @6, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    ALU 54, @108, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1
+; EG-NEXT:    ALU 99, @6, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 54, @106, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 6:
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T1.W, KC0[4].X, literal.x, PV.W,
-; EG-NEXT:     AND_INT * T2.W, KC0[4].X, literal.y,
+; EG-NEXT:     BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, KC0[3].Z, literal.y,
 ; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
-; EG-NEXT:     OR_INT T0.Z, PS, literal.x,
-; EG-NEXT:     BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T3.W, PV.W, literal.z,
-; EG-NEXT:    8388608(1.175494e-38), 23(3.222986e-44)
-; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:     ADD_INT T0.Y, PV.W, literal.x,
-; EG-NEXT:     AND_INT T1.Z, PS, literal.y,
-; EG-NEXT:     NOT_INT T4.W, PS,
-; EG-NEXT:     LSHR * T5.W, PV.Z, 1,
-; EG-NEXT:    -127(nan), 31(4.344025e-44)
+; EG-NEXT:     OR_INT T2.W, PS, literal.x,
+; EG-NEXT:     ADD_INT * T3.W, PV.W, literal.y,
+; EG-NEXT:    8388608(1.175494e-38), -150(nan)
 ; EG-NEXT:     ADD_INT T0.X, T1.W, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W,
-; EG-NEXT:     AND_INT T2.Z, T3.W, literal.y, BS:VEC_201
-; EG-NEXT:     LSHL T3.W, T0.Z, PV.Z,
-; EG-NEXT:     SUB_INT * T1.W, literal.z, T1.W,
-; EG-NEXT:    -127(nan), 32(4.484155e-44)
-; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T1.X, PS, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS,
-; EG-NEXT:     AND_INT T0.Z, KC0[3].Z, literal.y,
-; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.Y, PV.W,
-; EG-NEXT:     SETGT_INT * T4.W, PV.X, literal.z,
+; EG-NEXT:     BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W,
+; EG-NEXT:     AND_INT T0.Z, PS, literal.z,
+; EG-NEXT:     NOT_INT T4.W, PS,
+; EG-NEXT:     LSHR * T5.W, PV.W, 1,
+; EG-NEXT:    -127(nan), 23(3.222986e-44)
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, PS, PV.W,
+; EG-NEXT:     AND_INT T1.Y, T3.W, literal.x,
+; EG-NEXT:     LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212
+; EG-NEXT:     AND_INT T3.W, KC0[4].X, literal.y,
+; EG-NEXT:     ADD_INT * T4.W, PV.Y, literal.z,
 ; EG-NEXT:    32(4.484155e-44), 8388607(1.175494e-38)
+; EG-NEXT:    -150(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.Y, PS, literal.x,
+; EG-NEXT:     OR_INT T1.Z, PV.W, literal.y,
+; EG-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, PV.Z,
+; EG-NEXT:     SETGT_INT * T5.W, T0.X, literal.z,
+; EG-NEXT:    31(4.344025e-44), 8388608(1.175494e-38)
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T2.X, PS, 0.0, PV.W,
-; EG-NEXT:     OR_INT T1.Y, PV.Z, literal.x,
-; EG-NEXT:     ADD_INT T0.Z, T2.W, literal.y,
-; EG-NEXT:     CNDE_INT T1.W, PV.X, PV.Y, 0.0,
-; EG-NEXT:     CNDE_INT * T3.W, T2.Z, T3.W, 0.0,
-; EG-NEXT:    8388608(1.175494e-38), -150(nan)
-; EG-NEXT:     CNDE_INT T1.X, T4.W, PV.W, PS,
-; EG-NEXT:     ASHR T2.Y, KC0[4].X, literal.x,
-; EG-NEXT:     AND_INT T1.Z, PV.Z, literal.x,
-; EG-NEXT:     NOT_INT T1.W, PV.Z,
-; EG-NEXT:     LSHR * T3.W, PV.Y, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T3.Y, T1.Y, PV.Z,
-; EG-NEXT:     XOR_INT T1.Z, PV.X, PV.Y,
-; EG-NEXT:     XOR_INT T1.W, T2.X, PV.Y,
-; EG-NEXT:     SUB_INT * T2.W, literal.x, T2.W,
-; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T1.X, T0.Z, literal.x,
-; EG-NEXT:     AND_INT T4.Y, PS, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122
-; EG-NEXT:     SUB_INT T1.W, PV.W, T2.Y,
-; EG-NEXT:     SUBB_UINT * T2.W, PV.Z, T2.Y,
-; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T2.X, PV.W, PS,
-; EG-NEXT:     CNDE_INT T1.Y, PV.Y, PV.Z, 0.0,
-; EG-NEXT:     CNDE_INT T0.Z, PV.X, T3.Y, 0.0,
-; EG-NEXT:     CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     SETGT_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT:     CNDE_INT T3.Y, PS, 0.0, PV.W,
+; EG-NEXT:     SUB_INT T2.Z, literal.x, T1.W,
+; EG-NEXT:     LSHL T1.W, PV.Z, PV.Y,
+; EG-NEXT:     AND_INT * T3.W, T4.W, literal.y,
+; EG-NEXT:    150(2.101948e-43), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
+; EG-NEXT:     AND_INT T2.Y, PV.Z, literal.x,
+; EG-NEXT:     SUB_INT T3.Z, literal.y, T0.Y,
+; EG-NEXT:     NOT_INT T4.W, T4.W,
+; EG-NEXT:     LSHR * T6.W, T1.Z, 1,
+; EG-NEXT:    32(4.484155e-44), 150(2.101948e-43)
+; EG-NEXT:     BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z,
+; EG-NEXT:     ADD_INT T0.Y, T0.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z,
+; EG-NEXT:     AND_INT * T4.W, PV.Z, literal.y,
+; EG-NEXT:    -127(nan), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T4.Y, T3.W, PV.Z, T1.W,
+; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT:     CNDE_INT T1.W, T1.Y, T0.Z, 0.0,
+; EG-NEXT:     CNDE_INT * T2.W, T2.Y, PV.X, 0.0,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T1.X, KC0[3].W, literal.x, T0.W,
-; EG-NEXT:     AND_INT T3.Y, KC0[3].W, literal.y,
-; EG-NEXT:     CNDE_INT T2.Z, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T1.W, PS, PV.Y, PV.Z,
-; EG-NEXT:     ASHR * T2.W, KC0[3].Z, literal.z,
-; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     CNDE_INT T2.X, T5.W, PS, PV.W,
+; EG-NEXT:     ASHR T1.Y, KC0[3].Z, literal.x,
+; EG-NEXT:     CNDE_INT T0.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T1.W, PV.Z, PV.X, T1.X,
+; EG-NEXT:     ASHR * T2.W, KC0[4].X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W,
-; EG-NEXT:     XOR_INT T1.Y, PV.W, PS,
+; EG-NEXT:     XOR_INT T2.Y, PV.W, PS,
 ; EG-NEXT:     XOR_INT T0.Z, PV.Z, PS,
-; EG-NEXT:     OR_INT T0.W, PV.Y, literal.y,
-; EG-NEXT:     SUB_INT * T1.W, literal.z, PV.X,
-; EG-NEXT:    23(3.222986e-44), 8388608(1.175494e-38)
+; EG-NEXT:     XOR_INT T1.W, PV.X, PV.Y,
+; EG-NEXT:     XOR_INT * T3.W, T3.Y, PV.Y,
+; EG-NEXT:     SUB_INT T3.Y, PS, T1.Y,
+; EG-NEXT:     SUBB_UINT T1.Z, PV.W, T1.Y,
+; EG-NEXT:     SUB_INT T3.W, PV.Z, T2.W,
+; EG-NEXT:     SUBB_UINT * T4.W, PV.Y, T2.W,
+; EG-NEXT:     SUB_INT T4.Y, PV.W, PS,
+; EG-NEXT:     SUB_INT T0.Z, PV.Y, PV.Z,
+; EG-NEXT:     BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W,
+; EG-NEXT:     AND_INT * T4.W, KC0[3].Y, literal.y,
+; EG-NEXT:    23(3.222986e-44), 8388607(1.175494e-38)
+; EG-NEXT:     SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT:     ADD_INT T3.Y, PV.W, literal.x,
+; EG-NEXT:     OR_INT T1.Z, PS, literal.y,
+; EG-NEXT:     BFE_UINT T0.W, KC0[3].W, literal.z, T0.W,
+; EG-NEXT:     ADD_INT * T4.W, PV.W, literal.w,
+; EG-NEXT:    -127(nan), 8388608(1.175494e-38)
+; EG-NEXT:    23(3.222986e-44), -150(nan)
+; EG-NEXT:     AND_INT T1.X, KC0[3].W, literal.x,
+; EG-NEXT:     ADD_INT T5.Y, PV.W, literal.y,
+; EG-NEXT:     SUB_INT T2.Z, literal.z, T3.W,
+; EG-NEXT:     NOT_INT T3.W, PS,
+; EG-NEXT:     LSHR * T5.W, PV.Z, 1,
+; EG-NEXT:    8388607(1.175494e-38), -150(nan)
 ; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T4.X, KC0[3].Y, literal.x,
-; EG-NEXT:     AND_INT T3.Y, PS, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS,
-; EG-NEXT:     SUB_INT T1.W, PV.Z, T2.W,
-; EG-NEXT:     SUBB_UINT * T3.W, PV.Y, T2.W,
-; EG-NEXT:    8388607(1.175494e-38), 32(4.484155e-44)
-; EG-NEXT:     SUB_INT T5.X, PV.W, PS,
-; EG-NEXT:     SETGT_INT T0.Y, 0.0, T0.Y,
-; EG-NEXT:     CNDE_INT T0.Z, PV.Y, PV.Z, 0.0,
-; EG-NEXT:     OR_INT T1.W, PV.X, literal.x,
-; EG-NEXT:     ADD_INT * T3.W, T3.X, literal.y,
-; EG-NEXT:    8388608(1.175494e-38), -150(nan)
-; EG-NEXT:     ADD_INT T4.X, T3.X, literal.x,
-; EG-NEXT:     SUB_INT T3.Y, literal.y, T3.X,
-; EG-NEXT:     AND_INT T2.Z, PS, literal.z,
-; EG-NEXT:     NOT_INT T4.W, PS,
-; EG-NEXT:     LSHR * T5.W, PV.W, 1,
-; EG-NEXT:    -127(nan), 150(2.101948e-43)
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T3.X, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T4.Y, T1.W, PV.Z,
-; EG-NEXT:     AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122
-; EG-NEXT:     AND_INT * T3.W, PV.Y, literal.x,
+; EG-NEXT:     BIT_ALIGN_INT T2.X, 0.0, PS, PV.W,
+; EG-NEXT:     AND_INT T6.Y, PV.Z, literal.x,
+; EG-NEXT:     AND_INT T3.Z, PV.Y, literal.y,
+; EG-NEXT:     OR_INT T3.W, PV.X, literal.z,
+; EG-NEXT:     AND_INT * T5.W, T4.W, literal.y,
+; EG-NEXT:    32(4.484155e-44), 31(4.344025e-44)
+; EG-NEXT:    8388608(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z,
+; EG-NEXT:     LSHL T7.Y, T1.Z, PS,
+; EG-NEXT:     AND_INT T1.Z, T4.W, literal.x,
+; EG-NEXT:     LSHL T4.W, PV.W, PV.Z,
+; EG-NEXT:     AND_INT * T5.W, T5.Y, literal.x,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     ADD_INT T6.X, T1.X, literal.x,
-; EG-NEXT:     CNDE_INT T3.Y, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT * T3.Z, PV.Z, PV.Y, 0.0,
-; EG-NEXT:    -150(nan), 0(0.000000e+00)
-; EG-NEXT:    ALU clause starting at 108:
-; EG-NEXT:     CNDE_INT T1.W, T2.Z, T3.X, T4.Y,
-; EG-NEXT:     SETGT_INT * T3.W, T4.X, literal.x,
+; EG-NEXT:     CNDE_INT T3.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T8.Y, PV.Z, PV.Y, 0.0,
+; EG-NEXT:     CNDE_INT * T2.Z, T6.Y, PV.X, 0.0,
+; EG-NEXT:    ALU clause starting at 106:
+; EG-NEXT:     CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     SETGT_INT * T7.W, T3.Y, literal.x,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T3.X, PS, 0.0, PV.W,
-; EG-NEXT:     CNDE_INT T3.Y, PS, T3.Y, T3.Z,
-; EG-NEXT:     AND_INT T2.Z, T6.X, literal.x,
-; EG-NEXT:     NOT_INT T1.W, T6.X,
-; EG-NEXT:     LSHR * T3.W, T0.W, 1,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     ASHR T7.X, KC0[3].Y, literal.x,
-; EG-NEXT:     ADD_INT T4.Y, T1.X, literal.y,
-; EG-NEXT:     BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W,
-; EG-NEXT:     LSHL T0.W, T0.W, PV.Z,
-; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT:     CNDE_INT T1.X, PS, 0.0, PV.W,
+; EG-NEXT:     CNDE_INT T6.Y, PS, T2.Z, T8.Y,
+; EG-NEXT:     SUB_INT T1.Z, literal.x, T0.W,
+; EG-NEXT:     NOT_INT T6.W, T5.Y,
+; EG-NEXT:     LSHR * T7.W, T3.W, 1,
+; EG-NEXT:    150(2.101948e-43), 0(0.000000e+00)
+; EG-NEXT:     ASHR T2.X, KC0[3].Y, literal.x,
+; EG-NEXT:     ADD_INT T5.Y, T0.W, literal.y,
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z,
+; EG-NEXT:     AND_INT * T3.W, PV.Z, literal.z,
 ; EG-NEXT:    31(4.344025e-44), -127(nan)
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T1.X, PS, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T5.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     SETGT_INT T2.Z, PV.Y, literal.x,
-; EG-NEXT:     XOR_INT T0.W, T3.Y, PV.X,
-; EG-NEXT:     XOR_INT * T1.W, T3.X, PV.X,
+; EG-NEXT:     CNDE_INT T4.X, PS, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T7.Y, T5.W, PV.Z, T4.W,
+; EG-NEXT:     SETGT_INT T1.Z, PV.Y, literal.x,
+; EG-NEXT:     XOR_INT T0.W, T6.Y, PV.X,
+; EG-NEXT:     XOR_INT * T3.W, T1.X, PV.X,
 ; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; EG-NEXT:     SUB_INT T3.X, PS, T7.X,
-; EG-NEXT:     SUBB_UINT T3.Y, PV.W, T7.X,
-; EG-NEXT:     CNDE_INT T3.Z, PV.Z, 0.0, PV.Y,
-; EG-NEXT:     CNDE_INT T1.W, PV.Z, T0.Z, PV.X,
-; EG-NEXT:     ASHR * T3.W, KC0[3].W, literal.x,
+; EG-NEXT:     SUB_INT T1.X, PS, T2.X,
+; EG-NEXT:     SUBB_UINT T6.Y, PV.W, T2.X,
+; EG-NEXT:     CNDE_INT T2.Z, PV.Z, 0.0, PV.Y,
+; EG-NEXT:     CNDE_INT T3.W, PV.Z, PV.X, T3.X,
+; EG-NEXT:     ASHR * T4.W, KC0[3].W, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     XOR_INT T1.X, PV.W, PS,
-; EG-NEXT:     XOR_INT T5.Y, PV.Z, PS,
-; EG-NEXT:     SUB_INT T0.Z, PV.X, PV.Y,
-; EG-NEXT:     SETGT_INT T1.W, 0.0, T4.X, BS:VEC_021/SCL_122
-; EG-NEXT:     CNDE_INT * T6.W, T0.Y, T5.X, 0.0,
-; EG-NEXT:     SETGT_INT T0.X, 0.0, T0.X,
+; EG-NEXT:     XOR_INT T3.X, PV.W, PS,
+; EG-NEXT:     XOR_INT T7.Y, PV.Z, PS,
+; EG-NEXT:     SUB_INT T1.Z, PV.X, PV.Y,
+; EG-NEXT:     SETGT_INT T3.W, 0.0, T3.Y,
+; EG-NEXT:     CNDE_INT * T6.W, T0.X, T0.Z, 0.0,
+; EG-NEXT:     SETGT_INT T1.X, 0.0, T0.Y,
 ; EG-NEXT:     CNDE_INT T6.Y, PV.W, PV.Z, 0.0,
-; EG-NEXT:     SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122
-; EG-NEXT:     SUB_INT T2.W, PV.Y, T3.W,
-; EG-NEXT:     SUBB_UINT * T4.W, PV.X, T3.W,
-; EG-NEXT:     SUB_INT T3.X, PV.W, PS,
-; EG-NEXT:     SETGT_INT T1.Y, 0.0, T4.Y,
-; EG-NEXT:     CNDE_INT T6.Z, T0.Y, PV.Z, 0.0,
-; EG-NEXT:     SUB_INT T0.W, T0.W, T7.X, BS:VEC_021/SCL_122
-; EG-NEXT:     CNDE_INT * T4.W, PV.X, T2.X, 0.0,
-; EG-NEXT:     CNDE_INT T6.X, T1.W, PV.W, 0.0,
-; EG-NEXT:     CNDE_INT T4.Y, PV.Y, PV.X, 0.0,
-; EG-NEXT:     SUB_INT T0.W, T1.Z, T2.Y,
-; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT:     SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     SUB_INT T1.W, PV.Y, T4.W,
+; EG-NEXT:     SUBB_UINT * T5.W, PV.X, T4.W,
+; EG-NEXT:     SUB_INT T4.X, PV.W, PS,
+; EG-NEXT:     SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122
+; EG-NEXT:     CNDE_INT T6.Z, T0.X, PV.Z, 0.0,
+; EG-NEXT:     SUB_INT T0.W, T0.W, T2.X,
+; EG-NEXT:     CNDE_INT * T1.W, PV.X, T4.Y, 0.0,
+; EG-NEXT:     CNDE_INT T6.X, T3.W, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T1.Y, PV.Y, PV.X, 0.0,
+; EG-NEXT:     SUB_INT T0.W, T2.Y, T2.W,
+; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T4.Z, T0.X, PV.W, 0.0,
-; EG-NEXT:     SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212
-; EG-NEXT:     CNDE_INT T4.X, T1.Y, PV.W, 0.0,
+; EG-NEXT:     CNDE_INT T1.Z, T1.X, PV.W, 0.0,
+; EG-NEXT:     SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212
+; EG-NEXT:     CNDE_INT T1.X, T0.Y, PV.W, 0.0,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %conv = fptoui <4 x float> %x to <4 x i64>
   store <4 x i64> %conv, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 7a0450761e1f1..3a867879bb809 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -228,23 +228,23 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.z,
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T0.Y, T1.X, literal.y,
 ; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
 ; R600-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, T0.X,
 ; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
 ; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.X, literal.x,
+; R600-NEXT:     MUL_IEEE T3.W, PV.Y, literal.x,
+; R600-NEXT:     CNDE_INT * T0.W, T0.W, PV.X, T2.W,
 ; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T1.Z, T1.Y, T3.X, PS,
-; R600-NEXT:     CNDE_INT T0.W, T1.W, PV.W, T1.X,
+; R600-NEXT:     CNDE_INT T1.Z, T1.W, PS, T1.X,
+; R600-NEXT:     CNDE_INT T0.W, T1.Y, T0.Y, PV.W,
 ; R600-NEXT:     LSHL * T1.W, PV.Z, literal.x,
 ; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
 ; R600-NEXT:     ADD_INT T1.W, PS, literal.x,
-; R600-NEXT:     CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT * T0.W, T4.W, PV.Z, PV.W,
 ; R600-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; R600-NEXT:     MUL_IEEE T0.W, PS, PV.W,
 ; R600-NEXT:     SETGT * T1.W, literal.x, KC0[2].Z,
@@ -258,65 +258,63 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; CM-LABEL: s_exp_f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 64, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 62, @4, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    ALU clause starting at 4:
 ; CM-NEXT:     AND_INT * T0.W, KC0[2].Z, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[2].Z, -PV.W,
-; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T2.Z, PV.W,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT:     RNDNE T1.Z, PV.W,
 ; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
 ; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT:     ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT:     FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
 ; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT:     TRUNC T1.Z, T1.Z,
+; CM-NEXT:     ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT:     MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT:    2130706432(1.701412e+38), -254(nan)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
+; CM-NEXT:     ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
 ; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
-; CM-NEXT:     LSHL * T1.W, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Z, T2.Y, T3.X, PV.Z,
+; CM-NEXT:     LSHL * T0.W, PV.Y, literal.x,
 ; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
 ; CM-NEXT:     ADD_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
+; CM-NEXT:     CNDE_INT * T0.W, T3.W, PV.Y, PV.Z,
 ; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; CM-NEXT:     MUL_IEEE T0.Z, PV.W, PV.Z,
 ; CM-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
@@ -610,105 +608,105 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].X, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
 ; R600-NEXT:     ADD * T1.W, KC0[3].X, -PV.W,
-; R600-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.y,
-; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.z,
-; R600-NEXT:    -4096(nan), 967029397(3.122284e-04)
-; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T1.Z, PS,
+; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT:     RNDNE T0.Z, PS,
 ; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T2.W, KC0[2].W, -PV.Z,
-; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Z, T0.Z, literal.y,
+; R600-NEXT:     AND_INT * T2.W, KC0[2].W, literal.y,
+; R600-NEXT:    1069064192(1.442383e+00), -4096(nan)
+; R600-NEXT:     ADD T1.Z, KC0[2].W, -PS,
 ; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
 ; R600-NEXT:     ADD * T1.W, T3.W, -PV.Z,
+; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
+; R600-NEXT:     ADD T2.Z, PS, PV.W,
+; R600-NEXT:     MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT:     MUL_IEEE * T1.W, T2.W, literal.y,
 ; R600-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
-; R600-NEXT:     ADD T3.Z, PS, PV.W,
-; R600-NEXT:     RNDNE T0.W, PV.Z,
-; R600-NEXT:     MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
-; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     TRUNC T0.Y, T1.Z,
-; R600-NEXT:     MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD T1.W, T2.Z, -PV.W, BS:VEC_201
+; R600-NEXT:     RNDNE T0.Y, PS,
+; R600-NEXT:     MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT:     TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212
 ; R600-NEXT:     EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T0.Z, PV.W, PV.Z,
-; R600-NEXT:     FLT_TO_INT T1.W, PV.Y,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.x,
-; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T1.Z, PS, literal.x,
-; R600-NEXT:     SETGT_UINT T3.W, PV.W, literal.y,
-; R600-NEXT:     EXP_IEEE * T0.Y, PV.Z,
-; R600-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T1.X, PV.W, T2.W, PV.Z,
-; R600-NEXT:     MUL_IEEE T1.Y, PS, literal.x,
-; R600-NEXT:     MAX_INT T0.Z, T1.W, literal.y,
-; R600-NEXT:     MIN_INT T2.W, T1.W, literal.z,
-; R600-NEXT:     TRUNC * T0.W, T0.W,
+; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
+; R600-NEXT:     FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T2.W, literal.y, PV.Z,
+; R600-NEXT:     ADD * T1.W, T1.W, -PV.Y,
+; R600-NEXT:    209715200(1.972152e-31), 967029397(3.122284e-04)
+; R600-NEXT:     ADD T1.Z, PS, PV.W,
+; R600-NEXT:     MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT:     SETGT_UINT * T1.W, PV.Y, literal.y,
+; R600-NEXT:    209715200(1.972152e-31), -229(nan)
+; R600-NEXT:     CNDE_INT T0.Z, PS, PV.W, T0.Z,
+; R600-NEXT:     SETGT_INT T0.W, T1.Y, literal.x,
+; R600-NEXT:     EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT:    -127(nan), 0(0.000000e+00)
+; R600-NEXT:     CNDE_INT T0.Z, PV.W, PV.Z, T0.X,
+; R600-NEXT:     MAX_INT T2.W, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.y,
+; R600-NEXT:    -330(nan), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
+; R600-NEXT:     ADD_INT T1.Z, T1.Y, literal.z,
+; R600-NEXT:     MIN_INT T2.W, T1.Y, literal.w,
+; R600-NEXT:     TRUNC * T4.W, T0.Y,
+; R600-NEXT:    209715200(1.972152e-31), 204(2.858649e-43)
+; R600-NEXT:    102(1.429324e-43), 381(5.338947e-43)
+; R600-NEXT:     FLT_TO_INT T3.X, PS,
+; R600-NEXT:     ADD_INT T0.Y, PV.W, literal.x,
+; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T2.W, T1.Y, literal.z,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, PV.Y, PV.Z,
+; R600-NEXT:    -254(nan), -127(nan)
+; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T4.X, T1.X, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T1.Z, T0.W, PS, T1.Y,
+; R600-NEXT:     CNDE_INT T0.W, PV.W, PV.Z, PV.Y,
+; R600-NEXT:     MAX_INT * T1.W, PV.X, literal.y,
 ; R600-NEXT:    2130706432(1.701412e+38), -330(nan)
-; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T2.X, PS,
-; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T0.W, T1.W, literal.z,
-; R600-NEXT:     SETGT_UINT * T2.W, T1.W, literal.w,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T3.X, T1.W, literal.x,
-; R600-NEXT:     CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT:     SETGT_INT T0.Z, T1.W, literal.x,
-; R600-NEXT:     MUL_IEEE T0.W, T0.X, literal.y,
-; R600-NEXT:     MUL_IEEE * T4.W, T0.Y, literal.y,
-; R600-NEXT:    -127(nan), 209715200(1.972152e-31)
-; R600-NEXT:     MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T4.Y, PV.W, literal.x,
-; R600-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
-; R600-NEXT:     CNDE_INT T3.W, T3.W, PV.X, T2.Y,
-; R600-NEXT:     MAX_INT * T5.W, T2.X, literal.y,
-; R600-NEXT:    209715200(1.972152e-31), -330(nan)
-; R600-NEXT:     SETGT_INT T3.X, T1.W, literal.x,
-; R600-NEXT:     ADD_INT T2.Y, PS, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T2.X, literal.z,
-; R600-NEXT:     SETGT_UINT * T1.W, T2.X, literal.w,
+; R600-NEXT:     SETGT_INT T0.X, T1.Y, literal.x,
+; R600-NEXT:     ADD_INT T0.Y, PS, literal.y,
+; R600-NEXT:     ADD_INT T2.Z, T3.X, literal.z,
+; R600-NEXT:     SETGT_UINT * T1.W, T3.X, literal.w,
 ; R600-NEXT:    127(1.779649e-43), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     MIN_INT * T5.W, T2.X, literal.x,
+; R600-NEXT:     MIN_INT * T4.W, T3.X, literal.x,
 ; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
 ; R600-NEXT:     ADD_INT T5.X, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
-; R600-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
-; R600-NEXT:     CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
-; R600-NEXT:     SETGT_INT * T6.W, T2.X, literal.y,
+; R600-NEXT:     ADD_INT T1.Y, T3.X, literal.y,
+; R600-NEXT:     SETGT_UINT T3.Z, T3.X, literal.z,
+; R600-NEXT:     CNDE_INT T4.W, T1.W, T0.Y, T2.Z,
+; R600-NEXT:     SETGT_INT * T5.W, T3.X, literal.y,
 ; R600-NEXT:    -254(nan), -127(nan)
 ; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T2.X,
-; R600-NEXT:     CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT:     SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
-; R600-NEXT:     CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
-; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T0.X, T0.Z, PS, T0.X,
-; R600-NEXT:     LSHL T3.Y, PV.W, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
-; R600-NEXT:     CNDE_INT T0.W, T1.W, T4.X, T4.W,
-; R600-NEXT:     MUL_IEEE * T1.W, T1.Y, literal.y,
+; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T3.X,
+; R600-NEXT:     CNDE_INT T0.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT:     SETGT_INT T2.Z, T3.X, literal.x,
+; R600-NEXT:     CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE * T4.W, T2.Y, literal.y,
+; R600-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212
+; R600-NEXT:     LSHL T1.Y, PV.W, literal.x,
+; R600-NEXT:     CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
+; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, T2.X, T3.W,
 ; R600-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T2.X, T3.Z, T1.Y, PS,
-; R600-NEXT:     CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
-; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT:     CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201
+; R600-NEXT:     LSHL T1.Z, PV.Z, literal.x,
 ; R600-NEXT:     ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     CNDE_INT * T1.W, T3.X, PV.X, T1.X,
+; R600-NEXT:     CNDE_INT * T1.W, T0.X, T0.Z, PV.X,
 ; R600-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
 ; R600-NEXT:     MUL_IEEE T1.Y, PS, PV.W,
-; R600-NEXT:     SETGT T1.Z, literal.x, KC0[3].X,
+; R600-NEXT:     SETGT T0.Z, literal.x, KC0[3].X,
 ; R600-NEXT:     ADD_INT * T0.W, PV.Z, literal.y,
 ; R600-NEXT:    -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
 ; R600-NEXT:    ALU clause starting at 101:
-; R600-NEXT:     CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
+; R600-NEXT:     CNDE_INT * T1.W, T2.Z, T1.X, T0.Y,
 ; R600-NEXT:     MUL_IEEE T0.Y, PV.W, T0.W,
-; R600-NEXT:     SETGT T0.Z, literal.x, KC0[2].W,
-; R600-NEXT:     CNDE T0.W, T1.Z, T1.Y, 0.0,
+; R600-NEXT:     SETGT T1.Z, literal.x, KC0[2].W,
+; R600-NEXT:     CNDE T0.W, T0.Z, T1.Y, 0.0,
 ; R600-NEXT:     SETGT * T1.W, KC0[3].X, literal.y,
 ; R600-NEXT:    -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
 ; R600-NEXT:     CNDE T1.Y, PS, PV.W, literal.x,
@@ -721,118 +719,116 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ;
 ; CM-LABEL: s_exp_v2f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 100, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 18, @105, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 98, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 18, @103, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    ALU clause starting at 4:
 ; CM-NEXT:     AND_INT * T0.W, KC0[2].W, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[2].W, -PV.W,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT:     AND_INT * T2.W, KC0[3].X, literal.z,
+; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD T1.Y, KC0[3].X, -PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
 ; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T0.Y, PV.W,
-; CM-NEXT:     AND_INT T2.Z, KC0[3].X, literal.x,
-; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
-; CM-NEXT:    -4096(nan), 1069064192(1.442383e+00)
 ; CM-NEXT:     MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT:     FLT_TO_INT T1.Z, PV.Y,
-; CM-NEXT:     ADD * T0.W, KC0[3].X, -PV.Z,
+; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
 ; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
-; CM-NEXT:     ADD T1.X, T0.Z, -T2.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
-; CM-NEXT:     MAX_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT:     RNDNE * T1.W, PV.Y,
-; CM-NEXT:    967029397(3.122284e-04), -330(nan)
-; CM-NEXT:     TRUNC T2.X, PV.W,
-; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
-; CM-NEXT:     ADD * T0.W, PV.X, T0.X,
-; CM-NEXT:    204(2.858649e-43), 1069064192(1.442383e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T0.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT:     ADD_INT T1.X, T1.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
-; CM-NEXT:     ADD T0.Z, T1.Y, -T1.W,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT:    102(1.429324e-43), 967029397(3.122284e-04)
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T3.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT:     SETGT_UINT T2.Z, T1.Z, literal.z,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT:    -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     TRUNC T1.X, T1.Z,
+; CM-NEXT:     RNDNE T2.Y, PV.W,
+; CM-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X, T1.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT:     MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
+; CM-NEXT:     ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     TRUNC T1.Z, T2.Y,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
 ; CM-NEXT:     EXP_IEEE T0.X (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Y, T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
-; CM-NEXT:     FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.x,
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T1.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT:     MUL_IEEE T3.Z, PV.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T2.X, PV.W, T0.W, PV.Z,
+; CM-NEXT:     FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT:     MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Z, T1.Y, T1.X, T0.W,
+; CM-NEXT:     SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    209715200(1.972152e-31), -127(nan)
+; CM-NEXT:     CNDE_INT T1.X, PV.W, PV.Z, T0.X,
 ; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
-; CM-NEXT:     MAX_INT * T0.W, T0.Z, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), -330(nan)
-; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, T0.Z, literal.y,
-; CM-NEXT:     SETGT_UINT T4.Z, T0.Z, literal.z,
-; CM-NEXT:     MUL_IEEE * T0.W, T0.Y, literal.w,
+; CM-NEXT:     SETGT_UINT T1.Z, PV.X, literal.y,
+; CM-NEXT:     MAX_INT * T1.W, T0.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
+; CM-NEXT:    -330(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T4.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y,
+; CM-NEXT:     SETGT_INT * T1.W, T2.X, literal.z,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T6.X, PV.W, literal.x,
-; CM-NEXT:     MIN_INT T4.Y, T0.Z, literal.y,
-; CM-NEXT:     CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT:     MIN_INT T1.Y, T1.Z, literal.x,
-; CM-NEXT:     ADD_INT T5.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    381(5.338947e-43), -254(nan)
 ; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T7.X, T1.W, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T4.Y, T0.Z, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT T4.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT * T2.W, T1.Y, PV.X, PV.Y,
+; CM-NEXT:    2130706432(1.701412e+38), -330(nan)
+; CM-NEXT:     CNDE_INT T0.X, T0.W, PV.W, T0.Z,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.x,
+; CM-NEXT:     ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT:     CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT:    -254(nan), -127(nan)
+; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T5.X, T0.Y, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T2.X,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT:     MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T5.X, T2.W, PV.W, T0.Y,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     SETGT_INT T1.Y, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.X, T0.Y, T1.Z,
+; CM-NEXT:     MUL_IEEE * T1.W, T5.X, literal.y,
+; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T5.X, T3.Z, T5.X, PV.W,
 ; CM-NEXT:     LSHL T0.Y, PV.Z, literal.x,
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T1.X, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT:     MUL_IEEE * T1.W, T2.Y, literal.y,
+; CM-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T0.W, T2.Y, PV.W,
 ; CM-NEXT:     LSHL T2.Y, PV.Z, literal.x,
 ; CM-NEXT:     ADD_INT * T0.Z, PV.Y, literal.y,
 ; CM-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
-; CM-NEXT:    ALU clause starting at 105:
-; CM-NEXT:     CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, T0.Z,
+; CM-NEXT:    ALU clause starting at 103:
+; CM-NEXT:     CNDE_INT * T0.W, T2.X, T4.X, T5.X,
+; CM-NEXT:     MUL_IEEE T2.X, PV.W, T0.Z,
 ; CM-NEXT:     SETGT T0.Y, literal.x, KC0[3].X,
 ; CM-NEXT:     ADD_INT T0.Z, T2.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212
 ; CM-NEXT:    -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
 ; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
 ; CM-NEXT:     SETGT T1.Y, literal.x, KC0[2].W,
@@ -1215,8 +1211,8 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ;
 ; R600-LABEL: s_exp_v3f32:
 ; R600:       ; %bb.0:
-; R600-NEXT:    ALU 100, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 69, @107, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 99, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 69, @106, KC0[CB0:0-32], KC1[]
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; R600-NEXT:    CF_END
@@ -1224,69 +1220,68 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; R600-NEXT:    ALU clause starting at 6:
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].Y, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.W, KC0[3].Y, -PV.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE T1.W, PV.W, literal.x,
+; R600-NEXT:     ADD * T2.W, KC0[3].Y, -PV.W,
 ; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T3.W, PS,
-; R600-NEXT:     MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT:     RNDNE * T3.W, PV.W,
+; R600-NEXT:     TRUNC T4.W, PV.W,
+; R600-NEXT:     MUL_IEEE * T5.W, T2.W, literal.x,
 ; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT:     TRUNC * T4.W, PV.W,
+; R600-NEXT:     MULADD_IEEE T2.W, T2.W, literal.x, PS,
+; R600-NEXT:     FLT_TO_INT * T4.W, PV.W,
 ; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T0.Z, PS,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T0.W, PS, PV.W,
-; R600-NEXT:     MAX_INT * T1.W, PV.Z, literal.x,
-; R600-NEXT:    -330(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T0.Y, PS, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, T0.Z, literal.y,
-; R600-NEXT:     SETGT_UINT T1.W, T0.Z, literal.z,
-; R600-NEXT:     EXP_IEEE * T0.X, PV.W,
+; R600-NEXT:     MAX_INT T0.Z, PS, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.y, PV.W,
+; R600-NEXT:     ADD * T1.W, T1.W, -T3.W,
+; R600-NEXT:    -330(nan), 967029397(3.122284e-04)
+; R600-NEXT:     ADD T0.Y, PS, PV.W,
+; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.x,
+; R600-NEXT:     ADD_INT T0.W, T4.W, literal.y,
+; R600-NEXT:     SETGT_UINT * T1.W, T4.W, literal.z,
 ; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
 ; R600-NEXT:    -229(nan), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T0.W, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.y,
-; R600-NEXT:    -127(nan), 209715200(1.972152e-31)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT:     MIN_INT T3.W, T0.Z, literal.y,
-; R600-NEXT:     AND_INT * T4.W, KC0[3].W, literal.z,
-; R600-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
-; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T1.X, T0.X, literal.x,
-; R600-NEXT:     ADD T1.Y, KC0[3].W, -PS,
-; R600-NEXT:     ADD_INT T2.Z, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT * T5.W, T0.Z, literal.w,
-; R600-NEXT:    2130706432(1.701412e+38), -254(nan)
+; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT:     SETGT_INT T0.W, T4.W, literal.x,
+; R600-NEXT:     EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT:    -127(nan), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T1.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T0.Y, PV.W, PV.Z, T4.W,
+; R600-NEXT:     MIN_INT T0.Z, T4.W, literal.y,
+; R600-NEXT:     AND_INT T2.W, KC0[3].W, literal.z,
+; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.w,
+; R600-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; R600-NEXT:    -4096(nan), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     ADD T1.Y, KC0[3].W, -PV.W,
+; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT:     ADD_INT T5.W, T4.W, literal.z,
+; R600-NEXT:     SETGT_UINT * T6.W, T4.W, literal.w,
+; R600-NEXT:    209715200(1.972152e-31), -254(nan)
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T2.X, PS, PV.W, PV.Z,
-; R600-NEXT:     SETGT_INT T2.Y, T0.Z, literal.x,
+; R600-NEXT:     CNDE_INT T3.X, PS, PV.W, PV.Z,
+; R600-NEXT:     SETGT_INT T2.Y, T4.W, literal.x,
 ; R600-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.y,
-; R600-NEXT:     MUL_IEEE T3.W, T4.W, literal.z,
-; R600-NEXT:     MUL_IEEE * T6.W, PV.X, literal.w,
+; R600-NEXT:     MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212
 ; R600-NEXT:    127(1.779649e-43), 967029397(3.122284e-04)
-; R600-NEXT:    1069064192(1.442383e+00), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
-; R600-NEXT:     RNDNE T3.Y, PV.W,
-; R600-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; R600-NEXT:     CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
-; R600-NEXT:     CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
 ; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T0.X, T0.W, PS, T0.X,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, T2.X, T3.W,
+; R600-NEXT:     CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT:     RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z,
+; R600-NEXT:     CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE * T1.W, T1.X, literal.y,
+; R600-NEXT:    1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T1.X, T6.W, T1.X, PS,
 ; R600-NEXT:     LSHL T0.Y, PV.W, literal.x,
 ; R600-NEXT:     AND_INT T1.Z, KC0[3].Z, literal.y,
-; R600-NEXT:     MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD * T1.W, T3.W, -PV.Y,
+; R600-NEXT:     MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT:     ADD * T1.W, T4.W, -PV.Y,
 ; R600-NEXT:    23(3.222986e-44), -4096(nan)
 ; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
 ; R600-NEXT:     ADD T1.Y, PS, PV.W,
 ; R600-NEXT:     MUL_IEEE T0.Z, PV.Z, literal.x,
 ; R600-NEXT:     ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
+; R600-NEXT:     CNDE_INT * T1.W, T2.Y, T0.X, PV.X,
 ; R600-NEXT:    1069064192(1.442383e+00), 1065353216(1.000000e+00)
 ; R600-NEXT:     MUL_IEEE T0.X, PS, PV.W,
 ; R600-NEXT:     ADD T0.Y, KC0[3].Z, -T1.Z,
@@ -1300,12 +1295,12 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; R600-NEXT:     MUL_IEEE * T1.W, PS, literal.z,
 ; R600-NEXT:    -1026650416(-1.032789e+02), 967029397(3.122284e-04)
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T1.X, literal.y,
 ; R600-NEXT:     MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
 ; R600-NEXT:     FLT_TO_INT T0.W, PV.Z,
 ; R600-NEXT:     MIN_INT * T2.W, PV.Y, literal.w,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; R600-NEXT:    1069064192(1.442383e+00), 381(5.338947e-43)
 ; R600-NEXT:     ADD_INT T4.X, PS, literal.x,
 ; R600-NEXT:     MAX_INT T0.Y, PV.W, literal.y,
@@ -1323,7 +1318,7 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
 ; R600-NEXT:     ADD_INT * T6.X, T0.W, literal.x,
 ; R600-NEXT:    -127(nan), 0(0.000000e+00)
-; R600-NEXT:    ALU clause starting at 107:
+; R600-NEXT:    ALU clause starting at 106:
 ; R600-NEXT:     SETGT_UINT T0.Y, T0.W, literal.x,
 ; R600-NEXT:     CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
 ; R600-NEXT:     SETGT_INT T2.W, T0.W, literal.y,
@@ -1339,25 +1334,25 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; R600-NEXT:     SETGT_UINT T5.X, T1.Y, literal.x,
 ; R600-NEXT:     CNDE_INT T4.Y, PS, PV.Z, PV.W,
 ; R600-NEXT:     MAX_INT T0.Z, T1.Y, literal.y,
-; R600-NEXT:     MUL_IEEE T4.W, T1.Z, literal.z,
-; R600-NEXT:     MUL_IEEE * T5.W, PV.Y, literal.w,
+; R600-NEXT:     MUL_IEEE T4.W, PV.Y, literal.z,
+; R600-NEXT:     MUL_IEEE * T5.W, T1.Z, literal.w,
 ; R600-NEXT:    254(3.559298e-43), -330(nan)
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT:     CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
-; R600-NEXT:     MUL_IEEE T3.Y, PV.W, literal.x,
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
+; R600-NEXT:     MUL_IEEE T6.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122
 ; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
 ; R600-NEXT:     ADD_INT T3.W, T1.Y, literal.z,
-; R600-NEXT:     SETGT_UINT * T5.W, T1.Y, literal.w,
+; R600-NEXT:     SETGT_UINT * T4.W, T1.Y, literal.w,
 ; R600-NEXT:    2130706432(1.701412e+38), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
 ; R600-NEXT:     CNDE_INT T8.X, PS, PV.Z, PV.W,
 ; R600-NEXT:     SETGT_INT T5.Y, T1.Y, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T2.W, T2.W, PV.X, T1.Z,
+; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.Y, T1.Z,
+; R600-NEXT:     CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212
 ; R600-NEXT:     LSHL * T3.W, T4.Y, literal.y,
 ; R600-NEXT:    -127(nan), 23(3.222986e-44)
 ; R600-NEXT:     ADD_INT T6.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT T0.Y, T0.W, PV.Z, PV.W,
 ; R600-NEXT:     CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
 ; R600-NEXT:     CNDE_INT T0.W, T5.X, T7.X, T4.X,
 ; R600-NEXT:     SETGT_INT * T2.W, T1.Y, literal.y,
@@ -1365,18 +1360,18 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ; R600-NEXT:     CNDE_INT T4.X, PS, PV.Z, PV.W,
 ; R600-NEXT:     MUL_IEEE T0.Y, PV.Y, PV.X,
 ; R600-NEXT:     SETGT T0.Z, literal.x, KC0[3].Z,
-; R600-NEXT:     CNDE_INT T0.W, T5.W, T2.Y, T1.W,
-; R600-NEXT:     MUL_IEEE * T1.W, T3.X, literal.y,
+; R600-NEXT:     MUL_IEEE T0.W, T2.Y, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T4.W, T3.X, T1.W,
 ; R600-NEXT:    -1026650416(-1.032789e+02), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T3.X, T5.X, T3.X, PS,
-; R600-NEXT:     CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
+; R600-NEXT:     CNDE_INT T1.X, T5.Y, PS, T1.X,
+; R600-NEXT:     CNDE_INT T1.Y, T5.X, T2.Y, PV.W,
 ; R600-NEXT:     CNDE T0.Z, PV.Z, PV.Y, 0.0,
 ; R600-NEXT:     SETGT T0.W, KC0[3].Z, literal.x,
 ; R600-NEXT:     LSHL * T1.W, PV.X, literal.y,
 ; R600-NEXT:    1118925336(8.872284e+01), 23(3.222986e-44)
-; R600-NEXT:     ADD_INT T1.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T3.X, PS, literal.x,
 ; R600-NEXT:     CNDE T0.Y, PV.W, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
+; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.X, PV.Y,
 ; R600-NEXT:     CNDE T0.W, T2.X, T0.X, 0.0,
 ; R600-NEXT:     SETGT * T1.W, KC0[3].Y, literal.z,
 ; R600-NEXT:    1065353216(1.000000e+00), 2139095040(INF)
@@ -1397,197 +1392,193 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) {
 ;
 ; CM-LABEL: s_exp_v3f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 102, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 80, @109, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
+; CM-NEXT:    ALU 101, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 77, @108, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    ALU clause starting at 6:
 ; CM-NEXT:     AND_INT * T0.W, KC0[3].Y, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[3].Y, -PV.W,
-; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T2.Z, PV.W,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT:     RNDNE T1.Z, PV.W,
 ; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
 ; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT:     ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT:     FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
 ; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT:     TRUNC T1.Z, T1.Z,
+; CM-NEXT:     ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT:     MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT:    2130706432(1.701412e+38), -254(nan)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
+; CM-NEXT:     ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
 ; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T1.X, T2.X, T0.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
+; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Y, T2.Y, T3.X, PV.Z,
 ; CM-NEXT:     LSHL T0.Z, PV.Y, literal.x,
-; CM-NEXT:     AND_INT * T1.W, KC0[3].Z, literal.y,
+; CM-NEXT:     AND_INT * T0.W, KC0[3].Z, literal.y,
 ; CM-NEXT:    23(3.222986e-44), -4096(nan)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, literal.x,
 ; CM-NEXT:     ADD T1.Y, KC0[3].Z, -PV.W,
-; CM-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
-; CM-NEXT:    1069064192(1.442383e+00), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Y, PV.W, PV.Z,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT:     RNDNE * T0.W, PV.X,
-; CM-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T0.Z, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT * T1.W, T3.W, PV.X, PV.Y,
+; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT:     AND_INT * T1.W, KC0[3].W, literal.z,
+; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
 ; CM-NEXT:     SETGT T1.X, literal.x, KC0[3].Y,
-; CM-NEXT:     TRUNC T2.Y, PV.W,
-; CM-NEXT:     AND_INT T1.Z, KC0[3].W, literal.y,
-; CM-NEXT:     MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
-; CM-NEXT:    -1026650416(-1.032789e+02), -4096(nan)
-; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT:     FLT_TO_INT T0.Z, PV.Y,
-; CM-NEXT:     ADD * T1.W, KC0[3].W, -PV.Z,
+; CM-NEXT:     ADD T2.Y, KC0[3].W, -PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y,
+; CM-NEXT:    -1026650416(-1.032789e+02), 1069064192(1.442383e+00)
+; CM-NEXT:     MULADD_IEEE T2.X, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212
 ; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
-; CM-NEXT:     ADD T0.X, T0.X, -T0.W,
-; CM-NEXT:     MUL_IEEE T2.Y, PV.W, literal.x,
-; CM-NEXT:     MAX_INT T2.Z, PV.Z, literal.y,
-; CM-NEXT:     RNDNE * T0.W, PV.Y,
-; CM-NEXT:    967029397(3.122284e-04), -330(nan)
-; CM-NEXT:     TRUNC T3.X, PV.W,
-; CM-NEXT:     ADD_INT T3.Y, PV.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
-; CM-NEXT:     ADD * T1.W, PV.X, T2.X,
-; CM-NEXT:    204(2.858649e-43), 1069064192(1.442383e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
-; CM-NEXT:     ADD T1.Z, T1.Y, -T0.W,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT:    102(1.429324e-43), 967029397(3.122284e-04)
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T4.X, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT:     SETGT_UINT T2.Z, T0.Z, literal.z,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT:    -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     TRUNC T3.X, T1.Z,
+; CM-NEXT:     RNDNE T1.Y, PV.W,
+; CM-NEXT:     MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z,
+; CM-NEXT:     ADD * T2.W, PV.Y, PV.X,
+; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE T0.Y, T2.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT:     MULADD_IEEE T2.X, T1.W, literal.x, T0.Z,
+; CM-NEXT:     ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T0.Z, T3.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.y,
+; CM-NEXT:    967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, PV.Z, literal.y,
+; CM-NEXT:     TRUNC T1.Z, T1.Y,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
 ; CM-NEXT:     EXP_IEEE T1.X (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T1.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T1.Z, T1.W,
+; CM-NEXT:     EXP_IEEE T1.Y, T1.W,
+; CM-NEXT:     EXP_IEEE T1.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T1.W,
-; CM-NEXT:    ALU clause starting at 109:
-; CM-NEXT:     CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
-; CM-NEXT:     FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T2.X, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT:     MUL_IEEE T4.Z, PV.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T3.X, PV.W, T0.W, PV.Z,
-; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
-; CM-NEXT:     MAX_INT * T0.W, T3.Z, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), -330(nan)
-; CM-NEXT:     ADD_INT T6.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, T3.Z, literal.y,
-; CM-NEXT:     SETGT_UINT T5.Z, T3.Z, literal.z,
-; CM-NEXT:     MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT:     MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Z, T3.Y, T3.X, T0.W,
+; CM-NEXT:     SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    209715200(1.972152e-31), -127(nan)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE * T4.Y, PV.Y, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 108:
+; CM-NEXT:     SETGT_UINT T1.Z, T2.X, literal.x,
+; CM-NEXT:     MAX_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT:    -229(nan), -330(nan)
+; CM-NEXT:     ADD_INT T4.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T5.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y,
+; CM-NEXT:     SETGT_INT * T1.W, T2.X, literal.z,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T7.X, PV.W, literal.x,
-; CM-NEXT:     MIN_INT T4.Y, T3.Z, literal.y,
-; CM-NEXT:     CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     SETGT_INT * T2.W, T3.Z, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
 ; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
-; CM-NEXT:     MIN_INT T1.Y, T0.Z, literal.x,
-; CM-NEXT:     ADD_INT T6.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    381(5.338947e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T8.X, T1.W, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T4.Y, T3.Z, literal.x,
-; CM-NEXT:     ADD_INT T3.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T1.Y,
+; CM-NEXT:     MUL_IEEE T0.Y, T0.Y, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT:    2130706432(1.701412e+38), -330(nan)
+; CM-NEXT:     CNDE_INT T4.X, T0.W, PV.W, T0.Z,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT:     ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T6.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT:     CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT:    -254(nan), -127(nan)
+; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, T1.W, PV.W, T2.X,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT:     MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T6.X, T2.W, PV.W, T1.Z,
-; CM-NEXT:     LSHL T5.Y, PV.Z, literal.x,
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T2.X, PV.W, T0.X,
-; CM-NEXT:     LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     SETGT_INT T2.Y, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.Y, T1.Z,
+; CM-NEXT:     MUL_IEEE * T1.W, T7.X, literal.y,
+; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T7.X, T3.Z, T7.X, PV.W,
+; CM-NEXT:     LSHL T1.Y, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.y,
+; CM-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T4.X, T0.W, T0.Y, PV.W,
+; CM-NEXT:     LSHL T0.Y, PV.Z, literal.x,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
+; CM-NEXT:     CNDE_INT * T0.W, T2.X, T5.X, PV.X,
 ; CM-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
 ; CM-NEXT:     MUL_IEEE T2.X, PV.W, PV.Z,
-; CM-NEXT:     SETGT T3.Y, literal.x, KC0[3].W,
+; CM-NEXT:     SETGT T1.Y, literal.x, KC0[3].W,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
+; CM-NEXT:     CNDE_INT * T0.W, T2.Y, T3.X, PV.X,
 ; CM-NEXT:    -1026650416(-1.032789e+02), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT:     SETGT T1.Y, literal.x, KC0[3].Z,
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, PV.Z,
+; CM-NEXT:     SETGT T0.Y, literal.x, KC0[3].Z,
 ; CM-NEXT:     CNDE T0.Z, PV.Y, PV.X, 0.0,
 ; CM-NEXT:     SETGT * T0.W, KC0[3].W, literal.y,
 ; CM-NEXT:    -1026650416(-1.032789e+02), 1118925336(8.872284e+01)
 ; CM-NEXT:     CNDE T2.X, PV.W, PV.Z, literal.x,
-; CM-NEXT:     CNDE T1.Y, PV.Y, PV.X, 0.0,
+; CM-NEXT:     CNDE T0.Y, PV.Y, PV.X, 0.0,
 ; CM-NEXT:     SETGT T0.Z, KC0[3].Z, literal.y,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
 ; CM-NEXT:    2139095040(INF), 1118925336(8.872284e+01)
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T0.X, PV.W, literal.x,
-; CM-NEXT:     CNDE T1.Y, PV.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE T0.Z, T1.X, T0.Y, 0.0,
+; CM-NEXT:     LSHR T3.X, PV.W, literal.x,
+; CM-NEXT:     CNDE T0.Y, PV.Z, PV.Y, literal.y,
+; CM-NEXT:     CNDE T0.Z, T1.X, T0.X, 0.0,
 ; CM-NEXT:     SETGT * T0.W, KC0[3].Y, literal.z,
 ; CM-NEXT:    2(2.802597e-45), 2139095040(INF)
 ; CM-NEXT:    1118925336(8.872284e+01), 0(0.000000e+00)
-; CM-NEXT:     CNDE * T1.X, PV.W, PV.Z, literal.x,
+; CM-NEXT:     CNDE * T0.X, PV.W, PV.Z, literal.x,
 ; CM-NEXT:    2139095040(INF), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in)
   store <3 x float> %result, ptr addrspace(1) %out
@@ -2050,227 +2041,224 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; R600-LABEL: s_exp_v4f32:
 ; R600:       ; %bb.0:
 ; R600-NEXT:    ALU 98, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 98, @105, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 24, @204, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 95, @105, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 24, @201, KC0[CB0:0-32], KC1[]
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
 ; R600-NEXT:    CF_END
 ; R600-NEXT:    PAD
 ; R600-NEXT:    ALU clause starting at 6:
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].Z, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.W, KC0[3].Z, -PV.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT:     ADD * T1.W, KC0[3].Z, -PV.W,
+; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; R600-NEXT:     RNDNE T4.W, PS,
+; R600-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122
 ; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T3.W, PS,
-; R600-NEXT:     MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PS,
+; R600-NEXT:     ADD * T1.W, T3.W, -PV.W,
 ; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT:     TRUNC * T4.W, PV.W,
-; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T0.Z, PS,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.Z, PS, PV.W,
-; R600-NEXT:     MAX_INT T0.W, PV.Z, literal.x,
-; R600-NEXT:     MIN_INT * T1.W, PV.Z, literal.y,
-; R600-NEXT:    -330(nan), 381(5.338947e-43)
-; R600-NEXT:     ADD_INT T0.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T0.Y, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT T0.W, T0.Z, literal.w,
-; R600-NEXT:     EXP_IEEE * T1.X, PV.Z,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
-; R600-NEXT:     SETGT_UINT T1.Y, T0.Z, literal.y,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T1.W, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.z,
-; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.y,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, T0.X,
-; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     AND_INT T2.Y, KC0[4].X, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.X, literal.y,
-; R600-NEXT:    -4096(nan), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T0.X, T1.Y, T3.X, PS,
-; R600-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T1.X,
-; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
-; R600-NEXT:     ADD T0.W, KC0[4].X, -PV.Y,
-; R600-NEXT:     MUL_IEEE * T1.W, PV.Y, literal.y,
-; R600-NEXT:    23(3.222986e-44), 1069064192(1.442383e+00)
-; R600-NEXT:     RNDNE T1.Y, PS,
-; R600-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T2.W, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
-; R600-NEXT:    967029397(3.122284e-04), 1065353216(1.000000e+00)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, PV.W,
-; R600-NEXT:     AND_INT T0.Z, KC0[3].W, literal.x,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
-; R600-NEXT:     TRUNC * T2.W, PV.Y,
-; R600-NEXT:    -4096(nan), 1069064192(1.442383e+00)
-; R600-NEXT:     SETGT T0.X, literal.x, KC0[3].Z,
-; R600-NEXT:     FLT_TO_INT T3.Y, PS,
-; R600-NEXT:     MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
-; R600-NEXT:     ADD T0.W, T1.W, -T1.Y,
-; R600-NEXT:     MUL_IEEE * T1.W, PV.Z, literal.z,
-; R600-NEXT:    -1026650416(-1.032789e+02), 967029397(3.122284e-04)
-; R600-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T1.X, PS,
-; R600-NEXT:     AND_INT T1.Y, KC0[3].Y, literal.x,
-; R600-NEXT:     ADD T1.Z, PV.W, PV.Z,
-; R600-NEXT:     MAX_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     MIN_INT * T2.W, PV.Y, literal.z,
-; R600-NEXT:    -4096(nan), -330(nan)
+; R600-NEXT:     ADD T0.W, PS, PV.W,
+; R600-NEXT:     TRUNC * T1.W, T4.W,
+; R600-NEXT:     FLT_TO_INT T1.W, PS,
+; R600-NEXT:     EXP_IEEE * T0.X, PV.W,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     MAX_INT T0.W, PV.W, literal.y,
+; R600-NEXT:     MIN_INT * T2.W, PV.W, literal.z,
+; R600-NEXT:    209715200(1.972152e-31), -330(nan)
 ; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T2.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T3.Y, literal.z,
-; R600-NEXT:     SETGT_UINT T0.W, T3.Y, literal.w,
-; R600-NEXT:     EXP_IEEE * T1.Z, PV.Z,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T3.X, T3.Y, literal.x,
-; R600-NEXT:     SETGT_UINT T4.Y, T3.Y, literal.y,
-; R600-NEXT:     CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T2.W, T3.Y, literal.x,
-; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.z,
+; R600-NEXT:     ADD_INT T1.X, PS, literal.x,
+; R600-NEXT:     AND_INT T0.Y, KC0[4].X, literal.y,
+; R600-NEXT:     ADD_INT T1.Z, PV.W, literal.z,
+; R600-NEXT:     ADD_INT * T0.W, T1.W, literal.w,
+; R600-NEXT:    -254(nan), -4096(nan)
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:     SETGT_UINT * T2.W, T1.W, literal.x,
+; R600-NEXT:    -229(nan), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT T2.X, T1.W, literal.x,
+; R600-NEXT:     SETGT_UINT T1.Y, T1.W, literal.y,
+; R600-NEXT:     CNDE_INT T1.Z, PV.W, T1.Z, T0.W,
+; R600-NEXT:     SETGT_INT T0.W, T1.W, literal.x,
+; R600-NEXT:     ADD * T3.W, KC0[4].X, -T0.Y,
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T4.X, T1.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
-; R600-NEXT:     CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
-; R600-NEXT:     CNDE_INT T4.W, PV.Y, PV.X, T2.X,
-; R600-NEXT:     SETGT_INT * T5.W, T3.Y, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T0.Y, literal.y,
+; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T1.W,
+; R600-NEXT:     CNDE_INT T4.W, PV.Y, PV.X, T1.X,
+; R600-NEXT:     SETGT_INT * T1.W, T1.W, literal.z,
+; R600-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
 ; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     ADD T2.X, KC0[3].W, -T0.Z,
-; R600-NEXT:     CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
-; R600-NEXT:    ALU clause starting at 105:
-; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.x,
-; R600-NEXT:     ADD * T3.W, KC0[3].Y, -T1.Y,
+; R600-NEXT:     CNDE_INT T1.X, PS, PV.Z, PV.W,
+; R600-NEXT:     RNDNE T3.Y, PV.Y,
+; R600-NEXT:     MULADD_IEEE T1.Z, T3.W, literal.x, PV.X,
+; R600-NEXT:     MUL_IEEE T3.W, T0.Z, literal.y,
+; R600-NEXT:     MUL_IEEE * T4.W, T0.X, literal.z,
+; R600-NEXT:    1069064192(1.442383e+00), 209715200(1.972152e-31)
 ; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T4.Y, T2.W, PV.W, T0.Z,
+; R600-NEXT:     MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z,
+; R600-NEXT:     ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT:     AND_INT * T3.W, KC0[3].Y, literal.z,
+; R600-NEXT:    2130706432(1.701412e+38), 967029397(3.122284e-04)
+; R600-NEXT:    -4096(nan), 0(0.000000e+00)
 ; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, T1.Y, literal.y,
-; R600-NEXT:     CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
-; R600-NEXT:     LSHL * T2.W, T3.Y, literal.z,
-; R600-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
-; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T4.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
-; R600-NEXT:     RNDNE T1.Z, PV.Y,
-; R600-NEXT:     MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT:     MUL_IEEE * T2.W, T2.X, literal.z,
+; R600-NEXT:     ADD T0.Y, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.W, T1.Y, T4.W, PV.X,
+; R600-NEXT:     LSHL * T2.W, T1.X, literal.y,
+; R600-NEXT:    1069064192(1.442383e+00), 23(3.222986e-44)
+; R600-NEXT:     AND_INT T0.X, KC0[3].W, literal.x,
+; R600-NEXT:     TRUNC T1.Y, T3.Y,
+; R600-NEXT:     ADD_INT T1.Z, PS, literal.y,
+; R600-NEXT:     CNDE_INT T0.W, T1.W, PV.Z, PV.W,
+; R600-NEXT:     EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT:    -4096(nan), 1065353216(1.000000e+00)
+; R600-NEXT:     MUL_IEEE T1.X, PV.W, PV.Z,
+; R600-NEXT:     FLT_TO_INT T1.Y, PV.Y,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     ADD T0.W, KC0[3].W, -PV.X,
+; R600-NEXT:     RNDNE * T1.W, T3.X,
+; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT:     SETGT T2.X, literal.x, KC0[3].Z,
+; R600-NEXT:     TRUNC T2.Y, PS,
+; R600-NEXT:     MUL_IEEE T1.Z, PV.W, literal.y,
+; R600-NEXT:     MUL_IEEE T2.W, PV.Z, literal.z,
+; R600-NEXT:     MAX_INT * T4.W, PV.Y, literal.w,
+; R600-NEXT:    -1026650416(-1.032789e+02), 967029397(3.122284e-04)
+; R600-NEXT:    209715200(1.972152e-31), -330(nan)
+; R600-NEXT:     ADD T4.X, KC0[3].Y, -T3.W,
+; R600-NEXT:     ADD_INT T3.Y, PS, literal.x,
+; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T4.W, T1.Y, literal.z,
+; R600-NEXT:     MIN_INT * T5.W, T1.Y, literal.w,
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:    -229(nan), 381(5.338947e-43)
+; R600-NEXT:     ADD_INT T5.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T4.Y, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T3.Z, T1.Y, literal.z,
+; R600-NEXT:     CNDE_INT T5.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT:     SETGT_INT * T6.W, T1.Y, literal.y,
+; R600-NEXT:    -254(nan), -127(nan)
+; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T6.X, T0.Y, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, PS, PV.W, T1.Y,
+; R600-NEXT:     CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X,
+; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:    ALU clause starting at 105:
+; R600-NEXT:     SETGT_INT T5.W, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE * T7.W, T4.X, literal.y,
+; R600-NEXT:    127(1.779649e-43), 967029397(3.122284e-04)
+; R600-NEXT:     MUL_IEEE T5.X, T0.X, literal.x,
+; R600-NEXT:     MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T2.Z, PV.W, T3.Y, T2.Z,
+; R600-NEXT:     MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201
+; R600-NEXT:     CNDE_INT * T2.W, T4.W, T2.W, T0.Z,
+; R600-NEXT:    1069064192(1.442383e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T4.X, T6.W, PS, T0.Y,
+; R600-NEXT:     CNDE_INT T0.Y, T3.Z, T6.X, PV.W,
+; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT:     MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201
+; R600-NEXT:     ADD * T1.W, T3.X, -T1.W,
+; R600-NEXT:    23(3.222986e-44), 967029397(3.122284e-04)
+; R600-NEXT:     ADD T3.X, PS, PV.W,
+; R600-NEXT:     ADD_INT T1.Y, PV.Z, literal.x,
+; R600-NEXT:     CNDE_INT T0.Z, T5.W, PV.X, PV.Y,
+; R600-NEXT:     RNDNE T1.W, T5.X,
+; R600-NEXT:     MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122
 ; R600-NEXT:    1065353216(1.000000e+00), 1069064192(1.442383e+00)
-; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T2.X, T2.X, literal.x, PS,
-; R600-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
-; R600-NEXT:     ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT:     MUL_IEEE T0.W, PV.Y, PV.X,
-; R600-NEXT:     SETGT * T2.W, literal.z, KC0[4].X,
-; R600-NEXT:    1069064192(1.442383e+00), 967029397(3.122284e-04)
-; R600-NEXT:    -1026650416(-1.032789e+02), 0(0.000000e+00)
-; R600-NEXT:     CNDE T3.X, PS, PV.W, 0.0,
-; R600-NEXT:     ADD T1.Y, PV.Z, PV.Y,
-; R600-NEXT:     TRUNC T1.Z, T1.Z,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD * T1.W, T1.W, -T1.X,
-; R600-NEXT:    967029397(3.122284e-04), 0(0.000000e+00)
-; R600-NEXT:     SETGT T2.X, KC0[4].X, literal.x,
-; R600-NEXT:     ADD T2.Y, PS, PV.W,
-; R600-NEXT:     FLT_TO_INT T0.Z, PV.Z,
-; R600-NEXT:     TRUNC T0.W, T1.X,
-; R600-NEXT:     EXP_IEEE * T1.X, PV.Y,
-; R600-NEXT:    1118925336(8.872284e+01), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT:     FLT_TO_INT T1.Y, PV.W,
-; R600-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     MUL_IEEE T0.W, PS, literal.z,
-; R600-NEXT:     EXP_IEEE * T1.W, PV.Y,
-; R600-NEXT:    2130706432(1.701412e+38), -330(nan)
+; R600-NEXT:     MULADD_IEEE T0.X, T0.X, literal.x, PS,
+; R600-NEXT:     ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE T0.Z, PV.Z, PV.Y,
+; R600-NEXT:     SETGT T0.W, literal.y, KC0[4].X,
+; R600-NEXT:     EXP_IEEE * T1.Y, PV.X,
+; R600-NEXT:    967029397(3.122284e-04), -1026650416(-1.032789e+02)
+; R600-NEXT:     CNDE T3.X, PV.W, PV.Z, 0.0,
+; R600-NEXT:     ADD T0.Y, PV.Y, PV.X,
+; R600-NEXT:     FLT_TO_INT T0.Z, T2.Y,
+; R600-NEXT:     TRUNC T0.W, T1.W,
+; R600-NEXT:     MUL_IEEE * T1.W, PS, literal.x,
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T5.X, PV.W, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T2.W, T0.Z, literal.z,
-; R600-NEXT:     MAX_INT * T3.W, PV.Y, literal.w,
-; R600-NEXT:    209715200(1.972152e-31), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -330(nan)
-; R600-NEXT:     SETGT_UINT T6.X, T0.Z, literal.x,
-; R600-NEXT:     ADD_INT T3.Y, PS, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.z,
-; R600-NEXT:     SETGT_UINT T3.W, T1.Y, literal.x,
-; R600-NEXT:     MIN_INT * T4.W, T1.Y, literal.w,
+; R600-NEXT:     SETGT T0.X, KC0[4].X, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT:     FLT_TO_INT T1.Z, PV.W,
+; R600-NEXT:     MAX_INT T0.W, PV.Z, literal.z,
+; R600-NEXT:     EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT:    1118925336(8.872284e+01), 209715200(1.972152e-31)
+; R600-NEXT:    -330(nan), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T4.X, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE T3.Y, PS, literal.y,
+; R600-NEXT:     ADD_INT T2.Z, PV.W, literal.z,
+; R600-NEXT:     ADD_INT * T0.W, T0.Z, literal.w,
+; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:     MAX_INT * T2.W, T1.Z, literal.x,
+; R600-NEXT:    -330(nan), 0(0.000000e+00)
+; R600-NEXT:     SETGT_UINT T5.X, T0.Z, literal.x,
+; R600-NEXT:     ADD_INT T4.Y, PV.W, literal.y,
+; R600-NEXT:     ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212
+; R600-NEXT:     SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     MIN_INT * T3.W, T1.Z, literal.w,
 ; R600-NEXT:    -229(nan), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), 381(5.338947e-43)
-; R600-NEXT:     ADD_INT T7.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T4.Y, T1.Y, literal.y,
-; R600-NEXT:     SETGT_UINT T3.Z, T1.Y, literal.z,
-; R600-NEXT:     CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT * T5.W, T1.Y, literal.y,
+; R600-NEXT:     ADD_INT T6.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T5.Y, T1.Z, literal.y,
+; R600-NEXT:     SETGT_UINT T4.Z, T1.Z, literal.z,
+; R600-NEXT:     CNDE_INT T3.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT:     SETGT_INT * T4.W, T1.Z, literal.y,
 ; R600-NEXT:    -254(nan), -127(nan)
 ; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T8.X, PS, PV.W, T1.Y,
-; R600-NEXT:     CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT:     SETGT_INT T2.Z, T1.Y, literal.x,
-; R600-NEXT:     CNDE_INT T2.W, T6.X, T1.Z, T2.W,
-; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.y,
+; R600-NEXT:     CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T4.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT:     SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221
+; R600-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
 ; R600-NEXT:    127(1.779649e-43), -127(nan)
-; R600-NEXT:     CNDE_INT T7.X, PS, PV.W, T0.Z,
-; R600-NEXT:     CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
-; R600-NEXT:     MIN_INT T1.Z, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T2.W, T1.W, literal.y,
-; R600-NEXT:     MUL_IEEE * T6.W, T2.Y, literal.z,
-; R600-NEXT:    381(5.338947e-43), 2130706432(1.701412e+38)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T8.X, T3.W, PS, T2.Y,
-; R600-NEXT:     MUL_IEEE T2.Y, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT * T6.W, T0.Z, literal.w,
+; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T0.Z,
+; R600-NEXT:     CNDE_INT T4.Y, PV.Z, PV.X, PV.Y,
+; R600-NEXT:     MIN_INT T2.Z, T0.Z, literal.x,
+; R600-NEXT:     MUL_IEEE T0.W, T3.Y, literal.y,
+; R600-NEXT:     MUL_IEEE * T5.W, T0.Y, literal.z,
+; R600-NEXT:    381(5.338947e-43), 209715200(1.972152e-31)
+; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T7.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, T2.W, PV.W, T3.Y,
+; R600-NEXT:     ADD_INT T2.Z, PV.Z, literal.y,
+; R600-NEXT:     ADD_INT T0.W, T0.Z, literal.z,
+; R600-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; R600-NEXT:    2130706432(1.701412e+38), -254(nan)
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T9.X, PS, PV.W, PV.Z,
-; R600-NEXT:     SETGT_INT T3.Y, T0.Z, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
-; R600-NEXT:     LSHL * T2.W, T1.Y, literal.y,
+; R600-NEXT:     CNDE_INT T8.X, PS, PV.W, PV.Z,
+; R600-NEXT:     SETGT_INT T5.Y, T0.Z, literal.x,
+; R600-NEXT:     CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT:     LSHL * T4.W, T4.Y, literal.y,
 ; R600-NEXT:    127(1.779649e-43), 23(3.222986e-44)
-; R600-NEXT:     ADD_INT T8.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
-; R600-NEXT:     CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
-; R600-NEXT:     CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
-; R600-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE * T1.W, T4.X, literal.x,
-; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T4.X, T6.W, T4.X, PV.W,
-; R600-NEXT:     CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
-; R600-NEXT:    ALU clause starting at 204:
+; R600-NEXT:     ADD_INT T7.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T0.Y, T1.Z, PV.Z, PV.W,
+; R600-NEXT:     CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
+; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T5.X, T2.Y, T1.W,
+; R600-NEXT:    1065353216(1.000000e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T5.X, T3.W, PS, T1.Y,
+; R600-NEXT:     CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT:    ALU clause starting at 201:
 ; R600-NEXT:     LSHL T0.Z, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T0.W, T1.Y, T8.X,
+; R600-NEXT:     MUL_IEEE T0.W, T0.Y, T7.X,
 ; R600-NEXT:     SETGT * T1.W, literal.y, KC0[3].W,
 ; R600-NEXT:    23(3.222986e-44), -1026650416(-1.032789e+02)
-; R600-NEXT:     CNDE T1.X, PS, PV.W, 0.0,
-; R600-NEXT:     SETGT T1.Y, KC0[3].W, literal.x,
+; R600-NEXT:     CNDE T4.X, PS, PV.W, 0.0,
+; R600-NEXT:     SETGT T0.Y, KC0[3].W, literal.x,
 ; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE * T1.W, T2.X, T3.X, literal.z,
+; R600-NEXT:     CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221
+; R600-NEXT:     CNDE * T1.W, T0.X, T3.X, literal.z,
 ; R600-NEXT:    1118925336(8.872284e+01), 1065353216(1.000000e+00)
 ; R600-NEXT:    2139095040(INF), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T2.X, PV.W, PV.Z,
+; R600-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
 ; R600-NEXT:     SETGT T2.Y, literal.x, KC0[3].Y,
 ; R600-NEXT:     CNDE T1.Z, PV.Y, PV.X, literal.y,
-; R600-NEXT:     CNDE T0.W, T0.X, T0.Y, 0.0,
+; R600-NEXT:     CNDE T0.W, T2.X, T1.X, 0.0,
 ; R600-NEXT:     SETGT * T2.W, KC0[3].Z, literal.z,
 ; R600-NEXT:    -1026650416(-1.032789e+02), 2139095040(INF)
 ; R600-NEXT:    1118925336(8.872284e+01), 0(0.000000e+00)
@@ -2285,8 +2273,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; CM-LABEL: s_exp_v4f32:
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 97, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 100, @104, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 36, @205, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 97, @104, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 35, @202, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
@@ -2305,224 +2293,220 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 ; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
 ; CM-NEXT:     MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
 ; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT:     MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.x,
 ; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
 ; CM-NEXT:     TRUNC T1.X, T1.Z,
-; CM-NEXT:     RNDNE T2.Y, PV.W,
-; CM-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     ADD * T0.W, PV.Y, PV.X,
 ; CM-NEXT:    1069064192(1.442383e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     TRUNC T2.X, T1.Z,
+; CM-NEXT:     MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y,
+; CM-NEXT:     FLT_TO_INT T2.Z, T1.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    967029397(3.122284e-04), 209715200(1.972152e-31)
+; CM-NEXT:     ADD T1.X, T0.Z, -T1.Z,
+; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T0.Z, T2.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T2.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
+; CM-NEXT:    102(1.429324e-43), -229(nan)
+; CM-NEXT:     ADD_INT T4.X, T2.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, T2.Z, literal.y,
+; CM-NEXT:     CNDE_INT T0.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T2.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T5.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Z, T2.Z,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, PV.X, T3.X,
+; CM-NEXT:     SETGT_INT * T3.W, T2.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
+; CM-NEXT:     AND_INT T3.X, KC0[3].Z, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T1.Y, T0.W,
+; CM-NEXT:    -4096(nan), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T1.Y, T3.Y, T5.X, PV.Z,
+; CM-NEXT:     LSHL T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    23(3.222986e-44), 1069064192(1.442383e+00)
+; CM-NEXT:     RNDNE T4.X, PV.W,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, T3.W, PV.X, PV.Y,
+; CM-NEXT:     ADD * T1.W, T1.X, T0.Y,
+; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; CM-NEXT:     EXP_IEEE T0.X, T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
-; CM-NEXT:     ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
-; CM-NEXT:     FLT_TO_INT T0.Z, T1.X,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
-; CM-NEXT:    967029397(3.122284e-04), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE T1.X, T0.Z, T2.Y,
+; CM-NEXT:     TRUNC T0.Y, T4.X,
+; CM-NEXT:     FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE * T1.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T2.X, PV.W, literal.x,
 ; CM-NEXT:     MUL_IEEE T1.Y, T0.X, literal.y,
 ; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.z,
-; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.w,
+; CM-NEXT:     MIN_INT * T2.W, PV.Z, literal.w,
 ; CM-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; CM-NEXT:    -330(nan), 381(5.338947e-43)
-; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T3.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
 ; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     ADD_INT T4.X, T0.Z, literal.x,
-; CM-NEXT:     SETGT_UINT T4.Y, T0.Z, literal.y,
+; CM-NEXT:     ADD_INT T6.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, T0.Z, literal.y,
 ; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.x,
 ; CM-NEXT:    -127(nan), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT:     CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
-; CM-NEXT:     SETGT_INT T0.Z, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE * T3.W, T1.Y, literal.y,
-; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
-; CM-NEXT:     CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
-; CM-NEXT:     AND_INT T1.Y, KC0[3].Z, literal.x,
-; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.W, T1.X, T0.W,
-; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
-; CM-NEXT:     LSHL T3.Y, PV.Z, literal.x,
-; CM-NEXT:     TRUNC T1.Z, T2.Y,
-; CM-NEXT:     ADD * T0.W, KC0[3].Z, -PV.Y,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
-; CM-NEXT:     FLT_TO_INT T2.Y, PV.Z,
-; CM-NEXT:     ADD_INT T1.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
-; CM-NEXT:    967029397(3.122284e-04), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT:     MIN_INT T3.Y, PV.Y, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
-; CM-NEXT:     ADD * T0.W, T0.Y, T2.X,
-; CM-NEXT:    381(5.338947e-43), 1069064192(1.442383e+00)
-; CM-NEXT:     EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE T0.Y, T0.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT:     MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
-; CM-NEXT:     MUL_IEEE T4.Y, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:     MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
-; CM-NEXT:    967029397(3.122284e-04), 2130706432(1.701412e+38)
-; CM-NEXT:    -254(nan), -330(nan)
-; CM-NEXT:     ADD_INT T2.X, T2.Y, literal.x,
-; CM-NEXT:     ADD_INT T3.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T2.Y, literal.z,
-; CM-NEXT:     SETGT_UINT * T0.W, T2.Y, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
-; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T3.X, T2.Y, literal.x,
-; CM-NEXT:     CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T2.Y, literal.y,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T4.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 104:
-; CM-NEXT:     CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T2.W, T2.Y, literal.x,
+; CM-NEXT:     CNDE_INT T7.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T2.Y, PV.Y, PV.X, T5.X,
+; CM-NEXT:     SETGT_INT * T0.Z, T0.Z, literal.x,
 ; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T2.X, T1.Y, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T0.W, T4.X, T1.W,
-; CM-NEXT:     MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
-; CM-NEXT:    1069064192(1.442383e+00), 2130706432(1.701412e+38)
-; CM-NEXT:     AND_INT T4.X, KC0[4].X, literal.x,
-; CM-NEXT:     CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
-; CM-NEXT:     LSHL * T0.W, PV.Y, literal.y,
-; CM-NEXT:    -4096(nan), 23(3.222986e-44)
-; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.y,
-; CM-NEXT:     RNDNE * T0.W, T2.X,
-; CM-NEXT:    1065353216(1.000000e+00), 1069064192(1.442383e+00)
-; CM-NEXT:     ADD T2.X, T2.X, -PV.W,
-; CM-NEXT:     RNDNE T1.Y, PV.Z,
-; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, PV.X,
-; CM-NEXT:     SETGT * T1.W, literal.x, KC0[3].W,
-; CM-NEXT:    -1026650416(-1.032789e+02), 0(0.000000e+00)
-; CM-NEXT:     CNDE T3.X, PV.W, PV.Z, 0.0,
-; CM-NEXT:     TRUNC T0.Y, T0.W,
-; CM-NEXT:     TRUNC T1.Z, PV.Y,
-; CM-NEXT:     ADD * T0.W, PV.X, T1.X,
+; CM-NEXT:    ALU clause starting at 104:
+; CM-NEXT:     ADD * T4.W, KC0[3].Z, -T3.X,
+; CM-NEXT:     MUL_IEEE T5.X, PV.W, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, T0.Z, T7.X, T2.Y,
+; CM-NEXT:     MUL_IEEE T1.Z, T1.Y, literal.y,
+; CM-NEXT:     CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122
+; CM-NEXT:    967029397(3.122284e-04), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T3.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z,
+; CM-NEXT:     LSHL T1.Z, PV.Y, literal.x,
+; CM-NEXT:     MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212
+; CM-NEXT:    23(3.222986e-44), 1069064192(1.442383e+00)
+; CM-NEXT:     MULADD_IEEE T2.X, T3.X, literal.x, PV.W,
+; CM-NEXT:     ADD T2.Y, T0.W, -T4.X,
+; CM-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T0.Z, PV.X, PV.Y,
+; CM-NEXT:    967029397(3.122284e-04), 1065353216(1.000000e+00)
+; CM-NEXT:     AND_INT T0.X, KC0[4].X, literal.x,
+; CM-NEXT:     MUL_IEEE T1.Y, PV.W, PV.Z,
+; CM-NEXT:     SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT:     ADD * T0.W, PV.Y, PV.X,
+; CM-NEXT:    -4096(nan), -1026650416(-1.032789e+02)
 ; CM-NEXT:     EXP_IEEE T0.X (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE * T0.W, T0.W,
-; CM-NEXT:     FLT_TO_INT T1.X, T1.Z,
-; CM-NEXT:     FLT_TO_INT T0.Y, T0.Y,
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, KC0[4].X, -T4.X,
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T2.X, PV.W, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE T2.Z, PV.Z, literal.z,
-; CM-NEXT:     SETGT_UINT * T2.W, PV.Y, literal.w,
-; CM-NEXT:    967029397(3.122284e-04), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
-; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
-; CM-NEXT:     MAX_INT * T1.W, T1.X, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 1069064192(1.442383e+00)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T4.Y, T1.X, literal.y,
-; CM-NEXT:     MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
-; CM-NEXT:     MAX_INT * T1.W, T0.Y, literal.w,
-; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:     CNDE T2.X, T0.Z, T1.Y, 0.0,
+; CM-NEXT:     ADD T1.Y, KC0[4].X, -T0.X,
+; CM-NEXT:     FLT_TO_INT T0.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE * T1.W, PV.W, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T0.Y, PV.Z, literal.y,
+; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, literal.z,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.w,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
+; CM-NEXT:    967029397(3.122284e-04), 1069064192(1.442383e+00)
+; CM-NEXT:     RNDNE T4.X, PV.W,
+; CM-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.W,
+; CM-NEXT:     SETGT_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT:    1069064192(1.442383e+00), -127(nan)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, T0.W,
+; CM-NEXT:     MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y,
+; CM-NEXT:     ADD T1.Z, T2.W, -PV.X,
+; CM-NEXT:     MAX_INT * T2.W, T0.Z, literal.y,
 ; CM-NEXT:    967029397(3.122284e-04), -330(nan)
-; CM-NEXT:     ADD T4.X, T0.Z, -T1.Y,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.Y, literal.z,
+; CM-NEXT:     ADD_INT T0.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     TRUNC T2.Z, T4.X,
+; CM-NEXT:     ADD * T2.W, PV.Z, PV.Y,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T6.X, T1.X, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT:     ADD * T3.W, PV.X, T1.Z,
-; CM-NEXT:    -229(nan), -127(nan)
-; CM-NEXT:     EXP_IEEE T1.X (MASKED), T3.W,
-; CM-NEXT:     EXP_IEEE T1.Y (MASKED), T3.W,
-; CM-NEXT:     EXP_IEEE T1.Z, T3.W,
-; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T3.W,
-; CM-NEXT:     CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
-; CM-NEXT:     SETGT_INT T2.Z, T1.X, literal.x,
-; CM-NEXT:     MUL_IEEE * T3.W, PV.Z, literal.y,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T2.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T4.Y, PV.W, literal.y,
-; CM-NEXT:     CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
-; CM-NEXT:     MIN_INT * T4.W, T1.X, literal.z,
+; CM-NEXT:     EXP_IEEE T1.X (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE T1.Y, T2.W,
+; CM-NEXT:     EXP_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT:     MUL_IEEE T4.X, T0.W, literal.x,
+; CM-NEXT:     FLT_TO_INT T3.Y, T2.Z,
+; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T0.Y, T0.X, T2.Y,
 ; CM-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; CM-NEXT:     CNDE_INT T0.X, T1.W, PV.W, T0.Z,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.Z, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, PV.Y, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, PV.Y, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T7.X, T0.Y, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T4.Z, T1.X, literal.z,
-; CM-NEXT:     SETGT_UINT * T4.W, T1.X, literal.w,
-; CM-NEXT:    381(5.338947e-43), -254(nan)
+; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T3.Y, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T3.Y, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
+; CM-NEXT:    102(1.429324e-43), -229(nan)
+; CM-NEXT:     ADD_INT T6.X, T3.Y, literal.x,
+; CM-NEXT:     SETGT_UINT T4.Y, T3.Y, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T1.W, T3.Y, literal.x,
 ; CM-NEXT:    -127(nan), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
-; CM-NEXT:     SETGT_INT T1.Y, T1.X, literal.x,
-; CM-NEXT:     ADD_INT T4.Z, PV.X, literal.y,
-; CM-NEXT:     ADD_INT * T5.W, T0.Y, literal.z,
+; CM-NEXT:     MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Z, T3.Y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Y, PV.X, T5.X,
+; CM-NEXT:     MIN_INT * T2.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T5.X, T3.Y, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T3.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T1.X, T2.W, PV.W, PV.Z,
-; CM-NEXT:     CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
-; CM-NEXT:     CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
-; CM-NEXT:     MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     CNDE_INT T2.Y, PV.X, T2.Y, T2.Z,
+; CM-NEXT:     MUL_IEEE T2.Z, T7.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T6.X, T0.Y, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T4.W, T2.X, PV.W,
-; CM-NEXT:     CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 205:
-; CM-NEXT:     LSHL * T2.W, T5.Y, literal.x,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
-; CM-NEXT:     CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
+; CM-NEXT:     SETGT_INT T8.X, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T1.Y,
+; CM-NEXT:     CNDE_INT T0.Z, T4.Y, T7.X, PV.Z,
+; CM-NEXT:     LSHL * T0.W, PV.Y, literal.y,
+; CM-NEXT:    127(1.779649e-43), 23(3.222986e-44)
+; CM-NEXT:    ALU clause starting at 202:
+; CM-NEXT:     ADD_INT T7.X, T0.W, literal.x,
+; CM-NEXT:     CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z,
 ; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
-; CM-NEXT:     CNDE_INT T1.X, T0.Z, PV.W, T0.W,
-; CM-NEXT:     LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE T0.Z, T0.Y, T2.X,
+; CM-NEXT:     CNDE_INT * T0.Z, T8.X, T0.X, T6.X,
+; CM-NEXT:     MUL_IEEE * T0.W, T4.X, literal.x,
+; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT:     CNDE_INT T0.X, T2.W, T4.X, PV.W,
+; CM-NEXT:     LSHL T1.Y, T0.Z, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122
 ; CM-NEXT:     SETGT * T0.W, literal.y, KC0[4].X,
 ; CM-NEXT:    23(3.222986e-44), -1026650416(-1.032789e+02)
-; CM-NEXT:     CNDE T2.X, PV.W, PV.Z, 0.0,
+; CM-NEXT:     CNDE T4.X, PV.W, PV.Z, 0.0,
 ; CM-NEXT:     SETGT T0.Y, KC0[4].X, literal.x,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T6.X, PV.X, T5.X,
+; CM-NEXT:     CNDE_INT * T0.W, T8.X, T3.X, PV.X,
 ; CM-NEXT:    1118925336(8.872284e+01), 1065353216(1.000000e+00)
-; CM-NEXT:     SETGT T1.X, KC0[3].W, literal.x,
+; CM-NEXT:     SETGT T0.X, KC0[3].W, literal.x,
 ; CM-NEXT:     MUL_IEEE T1.Y, PV.W, PV.Z,
 ; CM-NEXT:     SETGT T0.Z, literal.y, KC0[3].Z,
 ; CM-NEXT:     CNDE * T0.W, PV.Y, PV.X, literal.z,
 ; CM-NEXT:    1118925336(8.872284e+01), -1026650416(-1.032789e+02)
 ; CM-NEXT:    2139095040(INF), 0(0.000000e+00)
-; CM-NEXT:     SETGT T2.X, literal.x, KC0[3].Y,
+; CM-NEXT:     SETGT T3.X, literal.x, KC0[3].Y,
 ; CM-NEXT:     CNDE T0.Y, PV.Z, PV.Y, 0.0,
-; CM-NEXT:     CNDE T0.Z, PV.X, T3.X, literal.y,
+; CM-NEXT:     CNDE T0.Z, PV.X, T2.X, literal.y,
 ; CM-NEXT:     SETGT * T1.W, KC0[3].Z, literal.z,
 ; CM-NEXT:    -1026650416(-1.032789e+02), 2139095040(INF)
 ; CM-NEXT:    1118925336(8.872284e+01), 0(0.000000e+00)
 ; CM-NEXT:     CNDE T0.Y, PV.W, PV.Y, literal.x,
-; CM-NEXT:     CNDE T1.Z, PV.X, T0.X, 0.0,
+; CM-NEXT:     CNDE T1.Z, PV.X, T1.X, 0.0,
 ; CM-NEXT:     SETGT * T1.W, KC0[3].Y, literal.y,
 ; CM-NEXT:    2139095040(INF), 1118925336(8.872284e+01)
 ; CM-NEXT:     CNDE * T0.X, PV.W, PV.Z, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 544c1de6c7bb7..a162949587481 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -230,23 +230,23 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.z,
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.y,
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T0.Y, T1.X, literal.y,
 ; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
 ; R600-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, T0.X,
 ; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
 ; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.X, literal.x,
+; R600-NEXT:     MUL_IEEE T3.W, PV.Y, literal.x,
+; R600-NEXT:     CNDE_INT * T0.W, T0.W, PV.X, T2.W,
 ; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T1.Z, T1.Y, T3.X, PS,
-; R600-NEXT:     CNDE_INT T0.W, T1.W, PV.W, T1.X,
+; R600-NEXT:     CNDE_INT T1.Z, T1.W, PS, T1.X,
+; R600-NEXT:     CNDE_INT T0.W, T1.Y, T0.Y, PV.W,
 ; R600-NEXT:     LSHL * T1.W, PV.Z, literal.x,
 ; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
 ; R600-NEXT:     ADD_INT T1.W, PS, literal.x,
-; R600-NEXT:     CNDE_INT * T0.W, T4.W, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT * T0.W, T4.W, PV.Z, PV.W,
 ; R600-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; R600-NEXT:     MUL_IEEE T0.W, PS, PV.W,
 ; R600-NEXT:     SETGT * T1.W, literal.x, KC0[2].Z,
@@ -260,65 +260,63 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) {
 ;
 ; CM-LABEL: s_exp10_f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 64, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 62, @4, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    ALU clause starting at 4:
 ; CM-NEXT:     AND_INT * T0.W, KC0[2].Z, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[2].Z, -PV.W,
-; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T2.Z, PV.W,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT:     RNDNE T1.Z, PV.W,
 ; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
 ; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT:     ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT:     FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
 ; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT:     TRUNC T1.Z, T1.Z,
+; CM-NEXT:     ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT:     MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT:    2130706432(1.701412e+38), -254(nan)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
+; CM-NEXT:     ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
 ; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.Y, T2.X, T0.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Z, T1.Z, PV.Z, T0.X,
-; CM-NEXT:     LSHL * T1.W, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Z, T2.Y, T3.X, PV.Z,
+; CM-NEXT:     LSHL * T0.W, PV.Y, literal.x,
 ; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
 ; CM-NEXT:     ADD_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT * T0.W, T0.W, PV.Z, PV.Y,
+; CM-NEXT:     CNDE_INT * T0.W, T3.W, PV.Y, PV.Z,
 ; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; CM-NEXT:     MUL_IEEE T0.Z, PV.W, PV.Z,
 ; CM-NEXT:     SETGT * T0.W, literal.x, KC0[2].Z,
@@ -612,105 +610,105 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].X, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
 ; R600-NEXT:     ADD * T1.W, KC0[3].X, -PV.W,
-; R600-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.y,
-; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.z,
-; R600-NEXT:    -4096(nan), 975668412(6.390323e-04)
-; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T1.Z, PS,
+; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; R600-NEXT:     RNDNE T0.Z, PS,
 ; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T2.W, KC0[2].W, -PV.Z,
-; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Z, T0.Z, literal.y,
+; R600-NEXT:     AND_INT * T2.W, KC0[2].W, literal.y,
+; R600-NEXT:    1079283712(3.321289e+00), -4096(nan)
+; R600-NEXT:     ADD T1.Z, KC0[2].W, -PS,
 ; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
 ; R600-NEXT:     ADD * T1.W, T3.W, -PV.Z,
+; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
+; R600-NEXT:     ADD T2.Z, PS, PV.W,
+; R600-NEXT:     MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT:     MUL_IEEE * T1.W, T2.W, literal.y,
 ; R600-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
-; R600-NEXT:     ADD T3.Z, PS, PV.W,
-; R600-NEXT:     RNDNE T0.W, PV.Z,
-; R600-NEXT:     MULADD_IEEE * T1.W, T2.W, literal.x, PV.Y, BS:VEC_021/SCL_122
-; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     TRUNC T0.Y, T1.Z,
-; R600-NEXT:     MULADD_IEEE T0.Z, T0.Z, literal.x, PS, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD T1.W, T2.Z, -PV.W, BS:VEC_201
+; R600-NEXT:     RNDNE T0.Y, PS,
+; R600-NEXT:     MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W,
+; R600-NEXT:     TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212
 ; R600-NEXT:     EXP_IEEE * T0.X, PV.Z,
-; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T0.Z, PV.W, PV.Z,
-; R600-NEXT:     FLT_TO_INT T1.W, PV.Y,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.x,
-; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T1.Z, PS, literal.x,
-; R600-NEXT:     SETGT_UINT T3.W, PV.W, literal.y,
-; R600-NEXT:     EXP_IEEE * T0.Y, PV.Z,
-; R600-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T1.X, PV.W, T2.W, PV.Z,
-; R600-NEXT:     MUL_IEEE T1.Y, PS, literal.x,
-; R600-NEXT:     MAX_INT T0.Z, T1.W, literal.y,
-; R600-NEXT:     MIN_INT T2.W, T1.W, literal.z,
-; R600-NEXT:     TRUNC * T0.W, T0.W,
+; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
+; R600-NEXT:     FLT_TO_INT T1.Y, PV.W,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T2.W, literal.y, PV.Z,
+; R600-NEXT:     ADD * T1.W, T1.W, -PV.Y,
+; R600-NEXT:    209715200(1.972152e-31), 975668412(6.390323e-04)
+; R600-NEXT:     ADD T1.Z, PS, PV.W,
+; R600-NEXT:     MUL_IEEE T0.W, PV.Z, literal.x,
+; R600-NEXT:     SETGT_UINT * T1.W, PV.Y, literal.y,
+; R600-NEXT:    209715200(1.972152e-31), -229(nan)
+; R600-NEXT:     CNDE_INT T0.Z, PS, PV.W, T0.Z,
+; R600-NEXT:     SETGT_INT T0.W, T1.Y, literal.x,
+; R600-NEXT:     EXP_IEEE * T1.X, PV.Z,
+; R600-NEXT:    -127(nan), 0(0.000000e+00)
+; R600-NEXT:     CNDE_INT T0.Z, PV.W, PV.Z, T0.X,
+; R600-NEXT:     MAX_INT T2.W, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.y,
+; R600-NEXT:    -330(nan), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
+; R600-NEXT:     ADD_INT T1.Z, T1.Y, literal.z,
+; R600-NEXT:     MIN_INT T2.W, T1.Y, literal.w,
+; R600-NEXT:     TRUNC * T4.W, T0.Y,
+; R600-NEXT:    209715200(1.972152e-31), 204(2.858649e-43)
+; R600-NEXT:    102(1.429324e-43), 381(5.338947e-43)
+; R600-NEXT:     FLT_TO_INT T3.X, PS,
+; R600-NEXT:     ADD_INT T0.Y, PV.W, literal.x,
+; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T2.W, T1.Y, literal.z,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, PV.Y, PV.Z,
+; R600-NEXT:    -254(nan), -127(nan)
+; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T4.X, T1.X, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T1.Z, T0.W, PS, T1.Y,
+; R600-NEXT:     CNDE_INT T0.W, PV.W, PV.Z, PV.Y,
+; R600-NEXT:     MAX_INT * T1.W, PV.X, literal.y,
 ; R600-NEXT:    2130706432(1.701412e+38), -330(nan)
-; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T2.X, PS,
-; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T0.W, T1.W, literal.z,
-; R600-NEXT:     SETGT_UINT * T2.W, T1.W, literal.w,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T3.X, T1.W, literal.x,
-; R600-NEXT:     CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT:     SETGT_INT T0.Z, T1.W, literal.x,
-; R600-NEXT:     MUL_IEEE T0.W, T0.X, literal.y,
-; R600-NEXT:     MUL_IEEE * T4.W, T0.Y, literal.y,
-; R600-NEXT:    -127(nan), 209715200(1.972152e-31)
-; R600-NEXT:     MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T4.Y, PV.W, literal.x,
-; R600-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, T1.W,
-; R600-NEXT:     CNDE_INT T3.W, T3.W, PV.X, T2.Y,
-; R600-NEXT:     MAX_INT * T5.W, T2.X, literal.y,
-; R600-NEXT:    209715200(1.972152e-31), -330(nan)
-; R600-NEXT:     SETGT_INT T3.X, T1.W, literal.x,
-; R600-NEXT:     ADD_INT T2.Y, PS, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T2.X, literal.z,
-; R600-NEXT:     SETGT_UINT * T1.W, T2.X, literal.w,
+; R600-NEXT:     SETGT_INT T0.X, T1.Y, literal.x,
+; R600-NEXT:     ADD_INT T0.Y, PS, literal.y,
+; R600-NEXT:     ADD_INT T2.Z, T3.X, literal.z,
+; R600-NEXT:     SETGT_UINT * T1.W, T3.X, literal.w,
 ; R600-NEXT:    127(1.779649e-43), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     MIN_INT * T5.W, T2.X, literal.x,
+; R600-NEXT:     MIN_INT * T4.W, T3.X, literal.x,
 ; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
 ; R600-NEXT:     ADD_INT T5.X, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
-; R600-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
-; R600-NEXT:     CNDE_INT T5.W, T1.W, T2.Y, T2.Z,
-; R600-NEXT:     SETGT_INT * T6.W, T2.X, literal.y,
+; R600-NEXT:     ADD_INT T1.Y, T3.X, literal.y,
+; R600-NEXT:     SETGT_UINT T3.Z, T3.X, literal.z,
+; R600-NEXT:     CNDE_INT T4.W, T1.W, T0.Y, T2.Z,
+; R600-NEXT:     SETGT_INT * T5.W, T3.X, literal.y,
 ; R600-NEXT:    -254(nan), -127(nan)
 ; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T2.X,
-; R600-NEXT:     CNDE_INT T2.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT:     SETGT_INT T2.Z, T2.X, literal.x, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T3.W, T3.X, T1.Z, T3.W, BS:VEC_021/SCL_122
-; R600-NEXT:     CNDE_INT * T0.W, T2.W, T4.Y, T0.W,
-; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T0.X, T0.Z, PS, T0.X,
-; R600-NEXT:     LSHL T3.Y, PV.W, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, PV.Z, PV.X, PV.Y,
-; R600-NEXT:     CNDE_INT T0.W, T1.W, T4.X, T4.W,
-; R600-NEXT:     MUL_IEEE * T1.W, T1.Y, literal.y,
+; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T3.X,
+; R600-NEXT:     CNDE_INT T0.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT:     SETGT_INT T2.Z, T3.X, literal.x,
+; R600-NEXT:     CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE * T4.W, T2.Y, literal.y,
+; R600-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212
+; R600-NEXT:     LSHL T1.Y, PV.W, literal.x,
+; R600-NEXT:     CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
+; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, T2.X, T3.W,
 ; R600-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T2.X, T3.Z, T1.Y, PS,
-; R600-NEXT:     CNDE_INT T0.Y, T6.W, PV.W, T0.Y,
-; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT:     CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201
+; R600-NEXT:     LSHL T1.Z, PV.Z, literal.x,
 ; R600-NEXT:     ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     CNDE_INT * T1.W, T3.X, PV.X, T1.X,
+; R600-NEXT:     CNDE_INT * T1.W, T0.X, T0.Z, PV.X,
 ; R600-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
 ; R600-NEXT:     MUL_IEEE T1.Y, PS, PV.W,
-; R600-NEXT:     SETGT T1.Z, literal.x, KC0[3].X,
+; R600-NEXT:     SETGT T0.Z, literal.x, KC0[3].X,
 ; R600-NEXT:     ADD_INT * T0.W, PV.Z, literal.y,
 ; R600-NEXT:    -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
 ; R600-NEXT:    ALU clause starting at 101:
-; R600-NEXT:     CNDE_INT * T1.W, T2.Z, T0.Y, T2.X,
+; R600-NEXT:     CNDE_INT * T1.W, T2.Z, T1.X, T0.Y,
 ; R600-NEXT:     MUL_IEEE T0.Y, PV.W, T0.W,
-; R600-NEXT:     SETGT T0.Z, literal.x, KC0[2].W,
-; R600-NEXT:     CNDE T0.W, T1.Z, T1.Y, 0.0,
+; R600-NEXT:     SETGT T1.Z, literal.x, KC0[2].W,
+; R600-NEXT:     CNDE T0.W, T0.Z, T1.Y, 0.0,
 ; R600-NEXT:     SETGT * T1.W, KC0[3].X, literal.y,
 ; R600-NEXT:    -1036817932(-4.485347e+01), 1109008539(3.853184e+01)
 ; R600-NEXT:     CNDE T1.Y, PS, PV.W, literal.x,
@@ -723,118 +721,116 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in)
 ;
 ; CM-LABEL: s_exp10_v2f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 100, @4, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 18, @105, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 98, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 18, @103, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    ALU clause starting at 4:
 ; CM-NEXT:     AND_INT * T0.W, KC0[2].W, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[2].W, -PV.W,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT:     AND_INT * T2.W, KC0[3].X, literal.z,
+; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD T1.Y, KC0[3].X, -PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y,
 ; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T0.Y, PV.W,
-; CM-NEXT:     AND_INT T2.Z, KC0[3].X, literal.x,
-; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.y, PV.Z,
-; CM-NEXT:    -4096(nan), 1079283712(3.321289e+00)
 ; CM-NEXT:     MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT:     FLT_TO_INT T1.Z, PV.Y,
-; CM-NEXT:     ADD * T0.W, KC0[3].X, -PV.Z,
+; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
 ; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
-; CM-NEXT:     ADD T1.X, T0.Z, -T2.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
-; CM-NEXT:     MAX_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT:     RNDNE * T1.W, PV.Y,
-; CM-NEXT:    975668412(6.390323e-04), -330(nan)
-; CM-NEXT:     TRUNC T2.X, PV.W,
-; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.y, PV.Y,
-; CM-NEXT:     ADD * T0.W, PV.X, T0.X,
-; CM-NEXT:    204(2.858649e-43), 1079283712(3.321289e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T0.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT:     ADD_INT T1.X, T1.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Y, T2.Z, literal.y, T0.Z, BS:VEC_102/SCL_221
-; CM-NEXT:     ADD T0.Z, T1.Y, -T1.W,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT:    102(1.429324e-43), 975668412(6.390323e-04)
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T3.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT:     SETGT_UINT T2.Z, T1.Z, literal.z,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT:    -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     TRUNC T1.X, T1.Z,
+; CM-NEXT:     RNDNE T2.Y, PV.W,
+; CM-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X, T1.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
+; CM-NEXT:     MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
+; CM-NEXT:     ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     TRUNC T1.Z, T2.Y,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
 ; CM-NEXT:     EXP_IEEE T0.X (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Y, T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     CNDE_INT T4.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T3.X, T2.Y, T1.X,
-; CM-NEXT:     FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.x,
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T1.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT:     MUL_IEEE T3.Z, PV.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T2.X, PV.W, T0.W, PV.Z,
+; CM-NEXT:     FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT:     MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Z, T1.Y, T1.X, T0.W,
+; CM-NEXT:     SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    209715200(1.972152e-31), -127(nan)
+; CM-NEXT:     CNDE_INT T1.X, PV.W, PV.Z, T0.X,
 ; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     CNDE_INT T3.Z, PV.X, T1.Y, T1.Z,
-; CM-NEXT:     MAX_INT * T0.W, T0.Z, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), -330(nan)
-; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, T0.Z, literal.y,
-; CM-NEXT:     SETGT_UINT T4.Z, T0.Z, literal.z,
-; CM-NEXT:     MUL_IEEE * T0.W, T0.Y, literal.w,
+; CM-NEXT:     SETGT_UINT T1.Z, PV.X, literal.y,
+; CM-NEXT:     MAX_INT * T1.W, T0.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
+; CM-NEXT:    -330(nan), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T4.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y,
+; CM-NEXT:     SETGT_INT * T1.W, T2.X, literal.z,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T6.X, PV.W, literal.x,
-; CM-NEXT:     MIN_INT T4.Y, T0.Z, literal.y,
-; CM-NEXT:     CNDE_INT T5.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT:     MIN_INT T1.Y, T1.Z, literal.x,
-; CM-NEXT:     ADD_INT T5.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T3.W, T0.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    381(5.338947e-43), -254(nan)
 ; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T7.X, T1.W, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T4.Y, T0.Z, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T1.W, T1.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT T4.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT * T2.W, T1.Y, PV.X, PV.Y,
+; CM-NEXT:    2130706432(1.701412e+38), -330(nan)
+; CM-NEXT:     CNDE_INT T0.X, T0.W, PV.W, T0.Z,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.x,
+; CM-NEXT:     ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT:     CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT:    -254(nan), -127(nan)
+; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T5.X, T0.Y, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T2.X,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT:     MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T8.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T5.X, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T4.Z, T6.X, T0.W, BS:VEC_201
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T5.X, T2.W, PV.W, T0.Y,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     SETGT_INT T1.Y, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.X, T0.Y, T1.Z,
+; CM-NEXT:     MUL_IEEE * T1.W, T5.X, literal.y,
+; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T5.X, T3.Z, T5.X, PV.W,
 ; CM-NEXT:     LSHL T0.Y, PV.Z, literal.x,
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T3.Z, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T3.X, T3.Y, T2.Y, BS:VEC_201
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T1.X, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT:     MUL_IEEE * T1.W, T2.Y, literal.y,
+; CM-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T0.W, T2.Y, PV.W,
 ; CM-NEXT:     LSHL T2.Y, PV.Z, literal.x,
 ; CM-NEXT:     ADD_INT * T0.Z, PV.Y, literal.y,
 ; CM-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
-; CM-NEXT:    ALU clause starting at 105:
-; CM-NEXT:     CNDE_INT * T0.W, T4.Y, T5.X, T2.X,
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, T0.Z,
+; CM-NEXT:    ALU clause starting at 103:
+; CM-NEXT:     CNDE_INT * T0.W, T2.X, T4.X, T5.X,
+; CM-NEXT:     MUL_IEEE T2.X, PV.W, T0.Z,
 ; CM-NEXT:     SETGT T0.Y, literal.x, KC0[3].X,
 ; CM-NEXT:     ADD_INT T0.Z, T2.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.Y, T0.X, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212
 ; CM-NEXT:    -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
 ; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
 ; CM-NEXT:     SETGT T1.Y, literal.x, KC0[2].W,
@@ -1217,8 +1213,8 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ;
 ; R600-LABEL: s_exp10_v3f32:
 ; R600:       ; %bb.0:
-; R600-NEXT:    ALU 100, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 69, @107, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 99, @6, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 69, @106, KC0[CB0:0-32], KC1[]
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; R600-NEXT:    CF_END
@@ -1226,69 +1222,68 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; R600-NEXT:    ALU clause starting at 6:
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].Y, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.W, KC0[3].Y, -PV.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE T1.W, PV.W, literal.x,
+; R600-NEXT:     ADD * T2.W, KC0[3].Y, -PV.W,
 ; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T3.W, PS,
-; R600-NEXT:     MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT:     RNDNE * T3.W, PV.W,
+; R600-NEXT:     TRUNC T4.W, PV.W,
+; R600-NEXT:     MUL_IEEE * T5.W, T2.W, literal.x,
 ; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT:     TRUNC * T4.W, PV.W,
+; R600-NEXT:     MULADD_IEEE T2.W, T2.W, literal.x, PS,
+; R600-NEXT:     FLT_TO_INT * T4.W, PV.W,
 ; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T0.Z, PS,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T0.W, PS, PV.W,
-; R600-NEXT:     MAX_INT * T1.W, PV.Z, literal.x,
-; R600-NEXT:    -330(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T0.Y, PS, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, T0.Z, literal.y,
-; R600-NEXT:     SETGT_UINT T1.W, T0.Z, literal.z,
-; R600-NEXT:     EXP_IEEE * T0.X, PV.W,
+; R600-NEXT:     MAX_INT T0.Z, PS, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.y, PV.W,
+; R600-NEXT:     ADD * T1.W, T1.W, -T3.W,
+; R600-NEXT:    -330(nan), 975668412(6.390323e-04)
+; R600-NEXT:     ADD T0.Y, PS, PV.W,
+; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.x,
+; R600-NEXT:     ADD_INT T0.W, T4.W, literal.y,
+; R600-NEXT:     SETGT_UINT * T1.W, T4.W, literal.z,
 ; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
 ; R600-NEXT:    -229(nan), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T0.W, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.y,
-; R600-NEXT:    -127(nan), 209715200(1.972152e-31)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.x,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT:     MIN_INT T3.W, T0.Z, literal.y,
-; R600-NEXT:     AND_INT * T4.W, KC0[3].W, literal.z,
-; R600-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
-; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T1.X, T0.X, literal.x,
-; R600-NEXT:     ADD T1.Y, KC0[3].W, -PS,
-; R600-NEXT:     ADD_INT T2.Z, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT * T5.W, T0.Z, literal.w,
-; R600-NEXT:    2130706432(1.701412e+38), -254(nan)
+; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
+; R600-NEXT:     SETGT_INT T0.W, T4.W, literal.x,
+; R600-NEXT:     EXP_IEEE * T0.X, PV.Y,
+; R600-NEXT:    -127(nan), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T1.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T0.Y, PV.W, PV.Z, T4.W,
+; R600-NEXT:     MIN_INT T0.Z, T4.W, literal.y,
+; R600-NEXT:     AND_INT T2.W, KC0[3].W, literal.z,
+; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.w,
+; R600-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; R600-NEXT:    -4096(nan), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     ADD T1.Y, KC0[3].W, -PV.W,
+; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
+; R600-NEXT:     ADD_INT T5.W, T4.W, literal.z,
+; R600-NEXT:     SETGT_UINT * T6.W, T4.W, literal.w,
+; R600-NEXT:    209715200(1.972152e-31), -254(nan)
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T2.X, PS, PV.W, PV.Z,
-; R600-NEXT:     SETGT_INT T2.Y, T0.Z, literal.x,
+; R600-NEXT:     CNDE_INT T3.X, PS, PV.W, PV.Z,
+; R600-NEXT:     SETGT_INT T2.Y, T4.W, literal.x,
 ; R600-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.y,
-; R600-NEXT:     MUL_IEEE T3.W, T4.W, literal.z,
-; R600-NEXT:     MUL_IEEE * T6.W, PV.X, literal.w,
+; R600-NEXT:     MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212
 ; R600-NEXT:    127(1.779649e-43), 975668412(6.390323e-04)
-; R600-NEXT:    1079283712(3.321289e+00), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T1.X, T5.W, T1.X, PS, BS:VEC_120/SCL_212
-; R600-NEXT:     RNDNE T3.Y, PV.W,
-; R600-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; R600-NEXT:     CNDE_INT T5.W, PV.Y, T1.Z, PV.X,
-; R600-NEXT:     CNDE_INT * T1.W, T1.W, T0.Y, T2.W,
 ; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T0.X, T0.W, PS, T0.X,
+; R600-NEXT:     CNDE_INT * T1.W, T1.W, T2.X, T3.W,
+; R600-NEXT:     CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT:     RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z,
+; R600-NEXT:     CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE * T1.W, T1.X, literal.y,
+; R600-NEXT:    1079283712(3.321289e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T1.X, T6.W, T1.X, PS,
 ; R600-NEXT:     LSHL T0.Y, PV.W, literal.x,
 ; R600-NEXT:     AND_INT T1.Z, KC0[3].Z, literal.y,
-; R600-NEXT:     MULADD_IEEE T0.W, T4.W, literal.z, PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD * T1.W, T3.W, -PV.Y,
+; R600-NEXT:     MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212
+; R600-NEXT:     ADD * T1.W, T4.W, -PV.Y,
 ; R600-NEXT:    23(3.222986e-44), -4096(nan)
 ; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
 ; R600-NEXT:     ADD T1.Y, PS, PV.W,
 ; R600-NEXT:     MUL_IEEE T0.Z, PV.Z, literal.x,
 ; R600-NEXT:     ADD_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     CNDE_INT * T1.W, T2.Y, PV.X, T1.X,
+; R600-NEXT:     CNDE_INT * T1.W, T2.Y, T0.X, PV.X,
 ; R600-NEXT:    1079283712(3.321289e+00), 1065353216(1.000000e+00)
 ; R600-NEXT:     MUL_IEEE T0.X, PS, PV.W,
 ; R600-NEXT:     ADD T0.Y, KC0[3].Z, -T1.Z,
@@ -1302,12 +1297,12 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; R600-NEXT:     MUL_IEEE * T1.W, PS, literal.z,
 ; R600-NEXT:    -1036817932(-4.485347e+01), 975668412(6.390323e-04)
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T1.X, literal.y,
 ; R600-NEXT:     MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W,
 ; R600-NEXT:     FLT_TO_INT T0.W, PV.Z,
 ; R600-NEXT:     MIN_INT * T2.W, PV.Y, literal.w,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; R600-NEXT:    1079283712(3.321289e+00), 381(5.338947e-43)
 ; R600-NEXT:     ADD_INT T4.X, PS, literal.x,
 ; R600-NEXT:     MAX_INT T0.Y, PV.W, literal.y,
@@ -1325,7 +1320,7 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
 ; R600-NEXT:     ADD_INT * T6.X, T0.W, literal.x,
 ; R600-NEXT:    -127(nan), 0(0.000000e+00)
-; R600-NEXT:    ALU clause starting at 107:
+; R600-NEXT:    ALU clause starting at 106:
 ; R600-NEXT:     SETGT_UINT T0.Y, T0.W, literal.x,
 ; R600-NEXT:     CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221
 ; R600-NEXT:     SETGT_INT T2.W, T0.W, literal.y,
@@ -1341,25 +1336,25 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; R600-NEXT:     SETGT_UINT T5.X, T1.Y, literal.x,
 ; R600-NEXT:     CNDE_INT T4.Y, PS, PV.Z, PV.W,
 ; R600-NEXT:     MAX_INT T0.Z, T1.Y, literal.y,
-; R600-NEXT:     MUL_IEEE T4.W, T1.Z, literal.z,
-; R600-NEXT:     MUL_IEEE * T5.W, PV.Y, literal.w,
+; R600-NEXT:     MUL_IEEE T4.W, PV.Y, literal.z,
+; R600-NEXT:     MUL_IEEE * T5.W, T1.Z, literal.w,
 ; R600-NEXT:    254(3.559298e-43), -330(nan)
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT:     CNDE_INT T6.X, T3.W, PS, T3.Y, BS:VEC_021/SCL_122
-; R600-NEXT:     MUL_IEEE T3.Y, PV.W, literal.x,
+; R600-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
+; R600-NEXT:     MUL_IEEE T6.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122
 ; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
 ; R600-NEXT:     ADD_INT T3.W, T1.Y, literal.z,
-; R600-NEXT:     SETGT_UINT * T5.W, T1.Y, literal.w,
+; R600-NEXT:     SETGT_UINT * T4.W, T1.Y, literal.w,
 ; R600-NEXT:    2130706432(1.701412e+38), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), -229(nan)
 ; R600-NEXT:     CNDE_INT T8.X, PS, PV.Z, PV.W,
 ; R600-NEXT:     SETGT_INT T5.Y, T1.Y, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, T0.Y, T4.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T2.W, T2.W, PV.X, T1.Z,
+; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.Y, T1.Z,
+; R600-NEXT:     CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212
 ; R600-NEXT:     LSHL * T3.W, T4.Y, literal.y,
 ; R600-NEXT:    -127(nan), 23(3.222986e-44)
 ; R600-NEXT:     ADD_INT T6.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T0.Y, T0.W, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT T0.Y, T0.W, PV.Z, PV.W,
 ; R600-NEXT:     CNDE_INT T0.Z, PV.Y, PV.X, T1.Y,
 ; R600-NEXT:     CNDE_INT T0.W, T5.X, T7.X, T4.X,
 ; R600-NEXT:     SETGT_INT * T2.W, T1.Y, literal.y,
@@ -1367,18 +1362,18 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ; R600-NEXT:     CNDE_INT T4.X, PS, PV.Z, PV.W,
 ; R600-NEXT:     MUL_IEEE T0.Y, PV.Y, PV.X,
 ; R600-NEXT:     SETGT T0.Z, literal.x, KC0[3].Z,
-; R600-NEXT:     CNDE_INT T0.W, T5.W, T2.Y, T1.W,
-; R600-NEXT:     MUL_IEEE * T1.W, T3.X, literal.y,
+; R600-NEXT:     MUL_IEEE T0.W, T2.Y, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T4.W, T3.X, T1.W,
 ; R600-NEXT:    -1036817932(-4.485347e+01), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T3.X, T5.X, T3.X, PS,
-; R600-NEXT:     CNDE_INT T1.Y, T5.Y, PV.W, T1.X,
+; R600-NEXT:     CNDE_INT T1.X, T5.Y, PS, T1.X,
+; R600-NEXT:     CNDE_INT T1.Y, T5.X, T2.Y, PV.W,
 ; R600-NEXT:     CNDE T0.Z, PV.Z, PV.Y, 0.0,
 ; R600-NEXT:     SETGT T0.W, KC0[3].Z, literal.x,
 ; R600-NEXT:     LSHL * T1.W, PV.X, literal.y,
 ; R600-NEXT:    1109008539(3.853184e+01), 23(3.222986e-44)
-; R600-NEXT:     ADD_INT T1.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T3.X, PS, literal.x,
 ; R600-NEXT:     CNDE T0.Y, PV.W, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.Y, PV.X,
+; R600-NEXT:     CNDE_INT T0.Z, T2.W, PV.X, PV.Y,
 ; R600-NEXT:     CNDE T0.W, T2.X, T0.X, 0.0,
 ; R600-NEXT:     SETGT * T1.W, KC0[3].Y, literal.z,
 ; R600-NEXT:    1065353216(1.000000e+00), 2139095040(INF)
@@ -1399,197 +1394,193 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in)
 ;
 ; CM-LABEL: s_exp10_v3f32:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 102, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 80, @109, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T3.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X
+; CM-NEXT:    ALU 101, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 77, @108, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    ALU clause starting at 6:
 ; CM-NEXT:     AND_INT * T0.W, KC0[3].Y, literal.x,
 ; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ADD * T1.W, KC0[3].Y, -PV.W,
-; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     RNDNE * T2.W, PV.Z,
-; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT:     TRUNC T2.Z, PV.W,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.W, literal.y,
+; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT:     RNDNE T1.Z, PV.W,
 ; CM-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z,
 ; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T0.Y, T0.W, literal.x, PV.W,
-; CM-NEXT:     ADD T0.Z, T0.Z, -T2.W,
-; CM-NEXT:     FLT_TO_INT * T0.W, PV.Z,
+; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212
 ; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
+; CM-NEXT:     TRUNC T1.Z, T1.Z,
+; CM-NEXT:     ADD * T0.W, PV.W, PV.Z,
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     FLT_TO_INT T0.Z, T1.Z,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MUL_IEEE T0.Y, PV.X, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T1.Z, literal.y,
-; CM-NEXT:     MAX_INT * T1.W, T0.W, literal.z,
-; CM-NEXT:    2130706432(1.701412e+38), -254(nan)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T1.X, T0.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T0.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.W, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
+; CM-NEXT:     ADD_INT T1.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T1.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T2.X, T0.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.z,
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.Z, PV.Y, T0.W,
-; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T0.W, T0.W, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), 127(1.779649e-43)
+; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T3.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.X,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
 ; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T1.W, PV.X, T2.W,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T0.Y, T0.W,
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T1.X, T2.X, T0.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Y, T1.Z, PV.Z, T0.X,
+; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T0.Y, T2.Y, T3.X, PV.Z,
 ; CM-NEXT:     LSHL T0.Z, PV.Y, literal.x,
-; CM-NEXT:     AND_INT * T1.W, KC0[3].Z, literal.y,
+; CM-NEXT:     AND_INT * T0.W, KC0[3].Z, literal.y,
 ; CM-NEXT:    23(3.222986e-44), -4096(nan)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, literal.x,
 ; CM-NEXT:     ADD T1.Y, KC0[3].Z, -PV.W,
-; CM-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T0.W, PV.Y, PV.X,
-; CM-NEXT:    1079283712(3.321289e+00), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.Y, PV.W, PV.Z,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT:     RNDNE * T0.W, PV.X,
-; CM-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T0.Z, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT * T1.W, T3.W, PV.X, PV.Y,
+; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.W, literal.y,
+; CM-NEXT:     AND_INT * T1.W, KC0[3].W, literal.z,
+; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT:    -4096(nan), 0(0.000000e+00)
 ; CM-NEXT:     SETGT T1.X, literal.x, KC0[3].Y,
-; CM-NEXT:     TRUNC T2.Y, PV.W,
-; CM-NEXT:     AND_INT T1.Z, KC0[3].W, literal.y,
-; CM-NEXT:     MULADD_IEEE * T2.W, T1.Y, literal.z, PV.Z,
-; CM-NEXT:    -1036817932(-4.485347e+01), -4096(nan)
-; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; CM-NEXT:     MULADD_IEEE T2.X, T1.W, literal.x, PV.W,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.Z, literal.y,
-; CM-NEXT:     FLT_TO_INT T0.Z, PV.Y,
-; CM-NEXT:     ADD * T1.W, KC0[3].W, -PV.Z,
+; CM-NEXT:     ADD T2.Y, KC0[3].W, -PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y,
+; CM-NEXT:    -1036817932(-4.485347e+01), 1079283712(3.321289e+00)
+; CM-NEXT:     MULADD_IEEE T2.X, T0.W, literal.x, PV.W,
+; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212
 ; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
-; CM-NEXT:     ADD T0.X, T0.X, -T0.W,
-; CM-NEXT:     MUL_IEEE T2.Y, PV.W, literal.x,
-; CM-NEXT:     MAX_INT T2.Z, PV.Z, literal.y,
-; CM-NEXT:     RNDNE * T0.W, PV.Y,
-; CM-NEXT:    975668412(6.390323e-04), -330(nan)
-; CM-NEXT:     TRUNC T3.X, PV.W,
-; CM-NEXT:     ADD_INT T3.Y, PV.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T2.Z, T1.W, literal.y, PV.Y,
-; CM-NEXT:     ADD * T1.W, PV.X, T2.X,
-; CM-NEXT:    204(2.858649e-43), 1079283712(3.321289e+00)
-; CM-NEXT:     EXP_IEEE T0.X, T1.W,
-; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
-; CM-NEXT:     MULADD_IEEE T2.Y, T1.Z, literal.y, T2.Z, BS:VEC_102/SCL_221
-; CM-NEXT:     ADD T1.Z, T1.Y, -T0.W,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.z,
-; CM-NEXT:    102(1.429324e-43), 975668412(6.390323e-04)
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T4.X, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.y,
-; CM-NEXT:     SETGT_UINT T2.Z, T0.Z, literal.z,
-; CM-NEXT:     ADD * T1.W, PV.Z, PV.Y,
-; CM-NEXT:    -229(nan), 2130706432(1.701412e+38)
-; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     TRUNC T3.X, T1.Z,
+; CM-NEXT:     RNDNE T1.Y, PV.W,
+; CM-NEXT:     MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z,
+; CM-NEXT:     ADD * T2.W, PV.Y, PV.X,
+; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE T0.Y, T2.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T2.W,
+; CM-NEXT:     MULADD_IEEE T2.X, T1.W, literal.x, T0.Z,
+; CM-NEXT:     ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T0.Z, T3.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.y,
+; CM-NEXT:    975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, PV.Z, literal.y,
+; CM-NEXT:     TRUNC T1.Z, T1.Y,
+; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
 ; CM-NEXT:     EXP_IEEE T1.X (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T1.Y (MASKED), T1.W,
-; CM-NEXT:     EXP_IEEE T1.Z, T1.W,
+; CM-NEXT:     EXP_IEEE T1.Y, T1.W,
+; CM-NEXT:     EXP_IEEE T1.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T1.W,
-; CM-NEXT:    ALU clause starting at 109:
-; CM-NEXT:     CNDE_INT T5.X, T2.Z, T0.W, T1.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T4.X, T3.Y, T2.X,
-; CM-NEXT:     FLT_TO_INT T3.Z, T3.X, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE * T0.W, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T2.X, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.X, literal.y,
-; CM-NEXT:     MUL_IEEE T4.Z, PV.W, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, PV.Z, literal.w,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T3.X, PV.W, T0.W, PV.Z,
-; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     CNDE_INT T4.Z, PV.X, T1.Y, T0.Z,
-; CM-NEXT:     MAX_INT * T0.W, T3.Z, literal.y,
-; CM-NEXT:    209715200(1.972152e-31), -330(nan)
-; CM-NEXT:     ADD_INT T6.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, T3.Z, literal.y,
-; CM-NEXT:     SETGT_UINT T5.Z, T3.Z, literal.z,
-; CM-NEXT:     MUL_IEEE * T0.W, T1.Z, literal.w, BS:VEC_120/SCL_212
+; CM-NEXT:     FLT_TO_INT T2.X, T1.Z,
+; CM-NEXT:     MUL_IEEE T2.Y, PV.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Z, T3.Y, T3.X, T0.W,
+; CM-NEXT:     SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    209715200(1.972152e-31), -127(nan)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE * T4.Y, PV.Y, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 108:
+; CM-NEXT:     SETGT_UINT T1.Z, T2.X, literal.x,
+; CM-NEXT:     MAX_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT:    -229(nan), -330(nan)
+; CM-NEXT:     ADD_INT T4.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T5.Y, T0.Z, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y,
+; CM-NEXT:     SETGT_INT * T1.W, T2.X, literal.z,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T7.X, PV.W, literal.x,
-; CM-NEXT:     MIN_INT T4.Y, T3.Z, literal.y,
-; CM-NEXT:     CNDE_INT T6.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     SETGT_INT * T2.W, T3.Z, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 381(5.338947e-43)
 ; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, T3.Z,
-; CM-NEXT:     MIN_INT T1.Y, T0.Z, literal.x,
-; CM-NEXT:     ADD_INT T6.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T3.W, T3.Z, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    381(5.338947e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T8.X, T1.W, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T4.Y, T3.Z, literal.x,
-; CM-NEXT:     ADD_INT T3.Z, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT * T1.W, T0.Z, literal.z, BS:VEC_120/SCL_212
+; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T1.Y,
+; CM-NEXT:     MUL_IEEE T0.Y, T0.Y, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212
+; CM-NEXT:    2130706432(1.701412e+38), -330(nan)
+; CM-NEXT:     CNDE_INT T4.X, T0.W, PV.W, T0.Z,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT:     ADD_INT T2.Z, T2.X, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, T2.X, literal.z,
+; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T6.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, T2.X, literal.y,
+; CM-NEXT:     SETGT_UINT T3.Z, T2.X, literal.z,
+; CM-NEXT:     CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z,
+; CM-NEXT:    -254(nan), -127(nan)
+; CM-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT:     CNDE_INT T1.Y, T1.W, PV.W, T2.X,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.Y, PV.X,
+; CM-NEXT:     MIN_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T2.X, T2.X, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T9.X, T2.Z, PV.W, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Y, T0.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T5.Z, T7.X, T0.W, BS:VEC_201
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T6.X, T2.W, PV.W, T1.Z,
-; CM-NEXT:     LSHL T5.Y, PV.Z, literal.x,
-; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T4.Z, PV.X,
-; CM-NEXT:     CNDE_INT * T0.W, T4.X, T3.Y, T2.Y,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T2.X, PV.W, T0.X,
-; CM-NEXT:     LSHL T2.Y, PV.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     SETGT_INT T2.Y, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.X, T1.Y, T1.Z,
+; CM-NEXT:     MUL_IEEE * T1.W, T7.X, literal.y,
+; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T7.X, T3.Z, T7.X, PV.W,
+; CM-NEXT:     LSHL T1.Y, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122
+; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.y,
+; CM-NEXT:    23(3.222986e-44), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T4.X, T0.W, T0.Y, PV.W,
+; CM-NEXT:     LSHL T0.Y, PV.Z, literal.x,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T4.Y, PV.X, T3.X, BS:VEC_021/SCL_122
+; CM-NEXT:     CNDE_INT * T0.W, T2.X, T5.X, PV.X,
 ; CM-NEXT:    23(3.222986e-44), 1065353216(1.000000e+00)
 ; CM-NEXT:     MUL_IEEE T2.X, PV.W, PV.Z,
-; CM-NEXT:     SETGT T3.Y, literal.x, KC0[3].W,
+; CM-NEXT:     SETGT T1.Y, literal.x, KC0[3].W,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.Y, PV.X, T5.X,
+; CM-NEXT:     CNDE_INT * T0.W, T2.Y, T3.X, PV.X,
 ; CM-NEXT:    -1036817932(-4.485347e+01), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT:     SETGT T1.Y, literal.x, KC0[3].Z,
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, PV.Z,
+; CM-NEXT:     SETGT T0.Y, literal.x, KC0[3].Z,
 ; CM-NEXT:     CNDE T0.Z, PV.Y, PV.X, 0.0,
 ; CM-NEXT:     SETGT * T0.W, KC0[3].W, literal.y,
 ; CM-NEXT:    -1036817932(-4.485347e+01), 1109008539(3.853184e+01)
 ; CM-NEXT:     CNDE T2.X, PV.W, PV.Z, literal.x,
-; CM-NEXT:     CNDE T1.Y, PV.Y, PV.X, 0.0,
+; CM-NEXT:     CNDE T0.Y, PV.Y, PV.X, 0.0,
 ; CM-NEXT:     SETGT T0.Z, KC0[3].Z, literal.y,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
 ; CM-NEXT:    2139095040(INF), 1109008539(3.853184e+01)
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T0.X, PV.W, literal.x,
-; CM-NEXT:     CNDE T1.Y, PV.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE T0.Z, T1.X, T0.Y, 0.0,
+; CM-NEXT:     LSHR T3.X, PV.W, literal.x,
+; CM-NEXT:     CNDE T0.Y, PV.Z, PV.Y, literal.y,
+; CM-NEXT:     CNDE T0.Z, T1.X, T0.X, 0.0,
 ; CM-NEXT:     SETGT * T0.W, KC0[3].Y, literal.z,
 ; CM-NEXT:    2(2.802597e-45), 2139095040(INF)
 ; CM-NEXT:    1109008539(3.853184e+01), 0(0.000000e+00)
-; CM-NEXT:     CNDE * T1.X, PV.W, PV.Z, literal.x,
+; CM-NEXT:     CNDE * T0.X, PV.W, PV.Z, literal.x,
 ; CM-NEXT:    2139095040(INF), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = call <3 x float> @llvm.exp10.v3f32(<3 x float> %in)
   store <3 x float> %result, ptr addrspace(1) %out
@@ -2052,227 +2043,224 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; R600-LABEL: s_exp10_v4f32:
 ; R600:       ; %bb.0:
 ; R600-NEXT:    ALU 98, @6, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 98, @105, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    ALU 24, @204, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 95, @105, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    ALU 24, @201, KC0[CB0:0-32], KC1[]
 ; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
 ; R600-NEXT:    CF_END
 ; R600-NEXT:    PAD
 ; R600-NEXT:    ALU clause starting at 6:
 ; R600-NEXT:     AND_INT * T0.W, KC0[3].Z, literal.x,
 ; R600-NEXT:    -4096(nan), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.W, KC0[3].Z, -PV.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.W, literal.x,
+; R600-NEXT:     ADD * T1.W, KC0[3].Z, -PV.W,
+; R600-NEXT:     MUL_IEEE T2.W, PV.W, literal.x,
+; R600-NEXT:     MUL_IEEE * T3.W, T0.W, literal.y,
+; R600-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; R600-NEXT:     RNDNE T4.W, PS,
+; R600-NEXT:     MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122
 ; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T3.W, PS,
-; R600-NEXT:     MUL_IEEE * T4.W, PV.W, literal.x,
+; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PS,
+; R600-NEXT:     ADD * T1.W, T3.W, -PV.W,
 ; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T1.W, T1.W, literal.x, PS,
-; R600-NEXT:     TRUNC * T4.W, PV.W,
-; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     FLT_TO_INT T0.Z, PS,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.x, PV.W,
-; R600-NEXT:     ADD * T1.W, T2.W, -T3.W,
-; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     ADD T1.Z, PS, PV.W,
-; R600-NEXT:     MAX_INT T0.W, PV.Z, literal.x,
-; R600-NEXT:     MIN_INT * T1.W, PV.Z, literal.y,
-; R600-NEXT:    -330(nan), 381(5.338947e-43)
-; R600-NEXT:     ADD_INT T0.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T0.Y, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT T0.W, T0.Z, literal.w,
-; R600-NEXT:     EXP_IEEE * T1.X, PV.Z,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T2.X, T0.Z, literal.x,
-; R600-NEXT:     SETGT_UINT T1.Y, T0.Z, literal.y,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T1.W, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE * T2.W, PS, literal.z,
-; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T3.X, T1.X, literal.x,
-; R600-NEXT:     MUL_IEEE T0.Y, PS, literal.y,
-; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T0.Z,
-; R600-NEXT:     CNDE_INT T3.W, PV.Y, PV.X, T0.X,
-; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
-; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     AND_INT T2.Y, KC0[4].X, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT T0.W, T0.W, PV.Y, T2.W,
-; R600-NEXT:     MUL_IEEE * T2.W, PV.X, literal.y,
-; R600-NEXT:    -4096(nan), 2130706432(1.701412e+38)
-; R600-NEXT:     CNDE_INT T0.X, T1.Y, T3.X, PS,
-; R600-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T1.X,
-; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
-; R600-NEXT:     ADD T0.W, KC0[4].X, -PV.Y,
-; R600-NEXT:     MUL_IEEE * T1.W, PV.Y, literal.y,
-; R600-NEXT:    23(3.222986e-44), 1079283712(3.321289e+00)
-; R600-NEXT:     RNDNE T1.Y, PS,
-; R600-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T2.W, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT * T3.W, T4.W, PV.Y, PV.X,
-; R600-NEXT:    975668412(6.390323e-04), 1065353216(1.000000e+00)
-; R600-NEXT:     MUL_IEEE T0.Y, PS, PV.W,
-; R600-NEXT:     AND_INT T0.Z, KC0[3].W, literal.x,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.W, literal.y, PV.Z,
-; R600-NEXT:     TRUNC * T2.W, PV.Y,
-; R600-NEXT:    -4096(nan), 1079283712(3.321289e+00)
-; R600-NEXT:     SETGT T0.X, literal.x, KC0[3].Z,
-; R600-NEXT:     FLT_TO_INT T3.Y, PS,
-; R600-NEXT:     MULADD_IEEE T1.Z, T2.Y, literal.y, PV.W,
-; R600-NEXT:     ADD T0.W, T1.W, -T1.Y,
-; R600-NEXT:     MUL_IEEE * T1.W, PV.Z, literal.z,
-; R600-NEXT:    -1036817932(-4.485347e+01), 975668412(6.390323e-04)
-; R600-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
-; R600-NEXT:     RNDNE T1.X, PS,
-; R600-NEXT:     AND_INT T1.Y, KC0[3].Y, literal.x,
-; R600-NEXT:     ADD T1.Z, PV.W, PV.Z,
-; R600-NEXT:     MAX_INT T0.W, PV.Y, literal.y,
-; R600-NEXT:     MIN_INT * T2.W, PV.Y, literal.z,
-; R600-NEXT:    -4096(nan), -330(nan)
+; R600-NEXT:     ADD T0.W, PS, PV.W,
+; R600-NEXT:     TRUNC * T1.W, T4.W,
+; R600-NEXT:     FLT_TO_INT T1.W, PS,
+; R600-NEXT:     EXP_IEEE * T0.X, PV.W,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     MAX_INT T0.W, PV.W, literal.y,
+; R600-NEXT:     MIN_INT * T2.W, PV.W, literal.z,
+; R600-NEXT:    209715200(1.972152e-31), -330(nan)
 ; R600-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T2.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T2.Y, PV.W, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T3.Y, literal.z,
-; R600-NEXT:     SETGT_UINT T0.W, T3.Y, literal.w,
-; R600-NEXT:     EXP_IEEE * T1.Z, PV.Z,
-; R600-NEXT:    -254(nan), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -229(nan)
-; R600-NEXT:     ADD_INT T3.X, T3.Y, literal.x,
-; R600-NEXT:     SETGT_UINT T4.Y, T3.Y, literal.y,
-; R600-NEXT:     CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT T2.W, T3.Y, literal.x,
-; R600-NEXT:     MUL_IEEE * T3.W, PS, literal.z,
+; R600-NEXT:     ADD_INT T1.X, PS, literal.x,
+; R600-NEXT:     AND_INT T0.Y, KC0[4].X, literal.y,
+; R600-NEXT:     ADD_INT T1.Z, PV.W, literal.z,
+; R600-NEXT:     ADD_INT * T0.W, T1.W, literal.w,
+; R600-NEXT:    -254(nan), -4096(nan)
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:     SETGT_UINT * T2.W, T1.W, literal.x,
+; R600-NEXT:    -229(nan), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT T2.X, T1.W, literal.x,
+; R600-NEXT:     SETGT_UINT T1.Y, T1.W, literal.y,
+; R600-NEXT:     CNDE_INT T1.Z, PV.W, T1.Z, T0.W,
+; R600-NEXT:     SETGT_INT T0.W, T1.W, literal.x,
+; R600-NEXT:     ADD * T3.W, KC0[4].X, -T0.Y,
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T4.X, T1.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
-; R600-NEXT:     CNDE_INT T2.Z, PV.W, PV.Z, T3.Y,
-; R600-NEXT:     CNDE_INT T4.W, PV.Y, PV.X, T2.X,
-; R600-NEXT:     SETGT_INT * T5.W, T3.Y, literal.z,
-; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, T0.Y, literal.y,
+; R600-NEXT:     CNDE_INT T1.Z, PV.W, PV.Z, T1.W,
+; R600-NEXT:     CNDE_INT T4.W, PV.Y, PV.X, T1.X,
+; R600-NEXT:     SETGT_INT * T1.W, T1.W, literal.z,
+; R600-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
 ; R600-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; R600-NEXT:     ADD T2.X, KC0[3].W, -T0.Z,
-; R600-NEXT:     CNDE_INT T3.Y, PS, PV.Z, PV.W,
-; R600-NEXT:     CNDE_INT * T2.Z, T0.W, PV.Y, T3.W,
-; R600-NEXT:    ALU clause starting at 105:
-; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.x,
-; R600-NEXT:     ADD * T3.W, KC0[3].Y, -T1.Y,
+; R600-NEXT:     CNDE_INT T1.X, PS, PV.Z, PV.W,
+; R600-NEXT:     RNDNE T3.Y, PV.Y,
+; R600-NEXT:     MULADD_IEEE T1.Z, T3.W, literal.x, PV.X,
+; R600-NEXT:     MUL_IEEE T3.W, T0.Z, literal.y,
+; R600-NEXT:     MUL_IEEE * T4.W, T0.X, literal.z,
+; R600-NEXT:    1079283712(3.321289e+00), 209715200(1.972152e-31)
 ; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T2.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T4.Y, T2.W, PV.W, T0.Z,
+; R600-NEXT:     MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z,
+; R600-NEXT:     ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212
+; R600-NEXT:     AND_INT * T3.W, KC0[3].Y, literal.z,
+; R600-NEXT:    2130706432(1.701412e+38), 975668412(6.390323e-04)
+; R600-NEXT:    -4096(nan), 0(0.000000e+00)
 ; R600-NEXT:     MUL_IEEE T3.X, PS, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, T1.Y, literal.y,
-; R600-NEXT:     CNDE_INT T3.Z, T4.Y, T4.X, PV.W, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T0.W, T2.W, T2.Z, T1.Z,
-; R600-NEXT:     LSHL * T2.W, T3.Y, literal.z,
-; R600-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
-; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT T4.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T3.Y, T5.W, PV.W, PV.Z,
-; R600-NEXT:     RNDNE T1.Z, PV.Y,
-; R600-NEXT:     MULADD_IEEE T0.W, T3.W, literal.y, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT:     MUL_IEEE * T2.W, T2.X, literal.z,
+; R600-NEXT:     ADD T0.Y, PV.W, PV.Z,
+; R600-NEXT:     CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.W, T1.Y, T4.W, PV.X,
+; R600-NEXT:     LSHL * T2.W, T1.X, literal.y,
+; R600-NEXT:    1079283712(3.321289e+00), 23(3.222986e-44)
+; R600-NEXT:     AND_INT T0.X, KC0[3].W, literal.x,
+; R600-NEXT:     TRUNC T1.Y, T3.Y,
+; R600-NEXT:     ADD_INT T1.Z, PS, literal.y,
+; R600-NEXT:     CNDE_INT T0.W, T1.W, PV.Z, PV.W,
+; R600-NEXT:     EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT:    -4096(nan), 1065353216(1.000000e+00)
+; R600-NEXT:     MUL_IEEE T1.X, PV.W, PV.Z,
+; R600-NEXT:     FLT_TO_INT T1.Y, PV.Y,
+; R600-NEXT:     MUL_IEEE T0.Z, PS, literal.x,
+; R600-NEXT:     ADD T0.W, KC0[3].W, -PV.X,
+; R600-NEXT:     RNDNE * T1.W, T3.X,
+; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; R600-NEXT:     SETGT T2.X, literal.x, KC0[3].Z,
+; R600-NEXT:     TRUNC T2.Y, PS,
+; R600-NEXT:     MUL_IEEE T1.Z, PV.W, literal.y,
+; R600-NEXT:     MUL_IEEE T2.W, PV.Z, literal.z,
+; R600-NEXT:     MAX_INT * T4.W, PV.Y, literal.w,
+; R600-NEXT:    -1036817932(-4.485347e+01), 975668412(6.390323e-04)
+; R600-NEXT:    209715200(1.972152e-31), -330(nan)
+; R600-NEXT:     ADD T4.X, KC0[3].Y, -T3.W,
+; R600-NEXT:     ADD_INT T3.Y, PS, literal.x,
+; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T4.W, T1.Y, literal.z,
+; R600-NEXT:     MIN_INT * T5.W, T1.Y, literal.w,
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:    -229(nan), 381(5.338947e-43)
+; R600-NEXT:     ADD_INT T5.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T4.Y, T1.Y, literal.y,
+; R600-NEXT:     SETGT_UINT T3.Z, T1.Y, literal.z,
+; R600-NEXT:     CNDE_INT T5.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT:     SETGT_INT * T6.W, T1.Y, literal.y,
+; R600-NEXT:    -254(nan), -127(nan)
+; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T6.X, T0.Y, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, PS, PV.W, T1.Y,
+; R600-NEXT:     CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X,
+; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:    ALU clause starting at 105:
+; R600-NEXT:     SETGT_INT T5.W, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE * T7.W, T4.X, literal.y,
+; R600-NEXT:    127(1.779649e-43), 975668412(6.390323e-04)
+; R600-NEXT:     MUL_IEEE T5.X, T0.X, literal.x,
+; R600-NEXT:     MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T2.Z, PV.W, T3.Y, T2.Z,
+; R600-NEXT:     MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201
+; R600-NEXT:     CNDE_INT * T2.W, T4.W, T2.W, T0.Z,
+; R600-NEXT:    1079283712(3.321289e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T4.X, T6.W, PS, T0.Y,
+; R600-NEXT:     CNDE_INT T0.Y, T3.Z, T6.X, PV.W,
+; R600-NEXT:     LSHL T0.Z, PV.Z, literal.x,
+; R600-NEXT:     MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201
+; R600-NEXT:     ADD * T1.W, T3.X, -T1.W,
+; R600-NEXT:    23(3.222986e-44), 975668412(6.390323e-04)
+; R600-NEXT:     ADD T3.X, PS, PV.W,
+; R600-NEXT:     ADD_INT T1.Y, PV.Z, literal.x,
+; R600-NEXT:     CNDE_INT T0.Z, T5.W, PV.X, PV.Y,
+; R600-NEXT:     RNDNE T1.W, T5.X,
+; R600-NEXT:     MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122
 ; R600-NEXT:    1065353216(1.000000e+00), 1079283712(3.321289e+00)
-; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     MULADD_IEEE T2.X, T2.X, literal.x, PS,
-; R600-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.y, PV.W,
-; R600-NEXT:     ADD T2.Z, T2.Y, -PV.Z, BS:VEC_120/SCL_212
-; R600-NEXT:     MUL_IEEE T0.W, PV.Y, PV.X,
-; R600-NEXT:     SETGT * T2.W, literal.z, KC0[4].X,
-; R600-NEXT:    1079283712(3.321289e+00), 975668412(6.390323e-04)
-; R600-NEXT:    -1036817932(-4.485347e+01), 0(0.000000e+00)
-; R600-NEXT:     CNDE T3.X, PS, PV.W, 0.0,
-; R600-NEXT:     ADD T1.Y, PV.Z, PV.Y,
-; R600-NEXT:     TRUNC T1.Z, T1.Z,
-; R600-NEXT:     MULADD_IEEE T0.W, T0.Z, literal.x, PV.X, BS:VEC_120/SCL_212
-; R600-NEXT:     ADD * T1.W, T1.W, -T1.X,
-; R600-NEXT:    975668412(6.390323e-04), 0(0.000000e+00)
-; R600-NEXT:     SETGT T2.X, KC0[4].X, literal.x,
-; R600-NEXT:     ADD T2.Y, PS, PV.W,
-; R600-NEXT:     FLT_TO_INT T0.Z, PV.Z,
-; R600-NEXT:     TRUNC T0.W, T1.X,
-; R600-NEXT:     EXP_IEEE * T1.X, PV.Y,
-; R600-NEXT:    1109008539(3.853184e+01), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T4.X, PS, literal.x,
-; R600-NEXT:     FLT_TO_INT T1.Y, PV.W,
-; R600-NEXT:     MAX_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     MUL_IEEE T0.W, PS, literal.z,
-; R600-NEXT:     EXP_IEEE * T1.W, PV.Y,
-; R600-NEXT:    2130706432(1.701412e+38), -330(nan)
+; R600-NEXT:     MULADD_IEEE T0.X, T0.X, literal.x, PS,
+; R600-NEXT:     ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212
+; R600-NEXT:     MUL_IEEE T0.Z, PV.Z, PV.Y,
+; R600-NEXT:     SETGT T0.W, literal.y, KC0[4].X,
+; R600-NEXT:     EXP_IEEE * T1.Y, PV.X,
+; R600-NEXT:    975668412(6.390323e-04), -1036817932(-4.485347e+01)
+; R600-NEXT:     CNDE T3.X, PV.W, PV.Z, 0.0,
+; R600-NEXT:     ADD T0.Y, PV.Y, PV.X,
+; R600-NEXT:     FLT_TO_INT T0.Z, T2.Y,
+; R600-NEXT:     TRUNC T0.W, T1.W,
+; R600-NEXT:     MUL_IEEE * T1.W, PS, literal.x,
 ; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T5.X, PV.W, literal.x,
-; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T2.W, T0.Z, literal.z,
-; R600-NEXT:     MAX_INT * T3.W, PV.Y, literal.w,
-; R600-NEXT:    209715200(1.972152e-31), 204(2.858649e-43)
-; R600-NEXT:    102(1.429324e-43), -330(nan)
-; R600-NEXT:     SETGT_UINT T6.X, T0.Z, literal.x,
-; R600-NEXT:     ADD_INT T3.Y, PS, literal.y,
-; R600-NEXT:     ADD_INT T2.Z, T1.Y, literal.z,
-; R600-NEXT:     SETGT_UINT T3.W, T1.Y, literal.x,
-; R600-NEXT:     MIN_INT * T4.W, T1.Y, literal.w,
+; R600-NEXT:     SETGT T0.X, KC0[4].X, literal.x,
+; R600-NEXT:     MUL_IEEE T2.Y, PS, literal.y,
+; R600-NEXT:     FLT_TO_INT T1.Z, PV.W,
+; R600-NEXT:     MAX_INT T0.W, PV.Z, literal.z,
+; R600-NEXT:     EXP_IEEE * T0.Y, PV.Y,
+; R600-NEXT:    1109008539(3.853184e+01), 209715200(1.972152e-31)
+; R600-NEXT:    -330(nan), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T4.X, T1.Y, literal.x,
+; R600-NEXT:     MUL_IEEE T3.Y, PS, literal.y,
+; R600-NEXT:     ADD_INT T2.Z, PV.W, literal.z,
+; R600-NEXT:     ADD_INT * T0.W, T0.Z, literal.w,
+; R600-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; R600-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; R600-NEXT:     MAX_INT * T2.W, T1.Z, literal.x,
+; R600-NEXT:    -330(nan), 0(0.000000e+00)
+; R600-NEXT:     SETGT_UINT T5.X, T0.Z, literal.x,
+; R600-NEXT:     ADD_INT T4.Y, PV.W, literal.y,
+; R600-NEXT:     ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212
+; R600-NEXT:     SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     MIN_INT * T3.W, T1.Z, literal.w,
 ; R600-NEXT:    -229(nan), 204(2.858649e-43)
 ; R600-NEXT:    102(1.429324e-43), 381(5.338947e-43)
-; R600-NEXT:     ADD_INT T7.X, PS, literal.x,
-; R600-NEXT:     ADD_INT T4.Y, T1.Y, literal.y,
-; R600-NEXT:     SETGT_UINT T3.Z, T1.Y, literal.z,
-; R600-NEXT:     CNDE_INT T4.W, PV.W, PV.Y, PV.Z,
-; R600-NEXT:     SETGT_INT * T5.W, T1.Y, literal.y,
+; R600-NEXT:     ADD_INT T6.X, PS, literal.x,
+; R600-NEXT:     ADD_INT T5.Y, T1.Z, literal.y,
+; R600-NEXT:     SETGT_UINT T4.Z, T1.Z, literal.z,
+; R600-NEXT:     CNDE_INT T3.W, PV.W, PV.Y, PV.Z,
+; R600-NEXT:     SETGT_INT * T4.W, T1.Z, literal.y,
 ; R600-NEXT:    -254(nan), -127(nan)
 ; R600-NEXT:    254(3.559298e-43), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T8.X, PS, PV.W, T1.Y,
-; R600-NEXT:     CNDE_INT T3.Y, PV.Z, PV.Y, PV.X,
-; R600-NEXT:     SETGT_INT T2.Z, T1.Y, literal.x,
-; R600-NEXT:     CNDE_INT T2.W, T6.X, T1.Z, T2.W,
-; R600-NEXT:     SETGT_INT * T4.W, T0.Z, literal.y,
+; R600-NEXT:     CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T4.Y, PV.Z, PV.Y, PV.X,
+; R600-NEXT:     SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212
+; R600-NEXT:     CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221
+; R600-NEXT:     SETGT_INT * T3.W, T0.Z, literal.y,
 ; R600-NEXT:    127(1.779649e-43), -127(nan)
-; R600-NEXT:     CNDE_INT T7.X, PS, PV.W, T0.Z,
-; R600-NEXT:     CNDE_INT T1.Y, PV.Z, PV.X, PV.Y,
-; R600-NEXT:     MIN_INT T1.Z, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T2.W, T1.W, literal.y,
-; R600-NEXT:     MUL_IEEE * T6.W, T2.Y, literal.z,
-; R600-NEXT:    381(5.338947e-43), 2130706432(1.701412e+38)
-; R600-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T8.X, T3.W, PS, T2.Y,
-; R600-NEXT:     MUL_IEEE T2.Y, PV.W, literal.x,
-; R600-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
-; R600-NEXT:     ADD_INT T3.W, T0.Z, literal.z,
-; R600-NEXT:     SETGT_UINT * T6.W, T0.Z, literal.w,
+; R600-NEXT:     CNDE_INT T6.X, PS, PV.W, T0.Z,
+; R600-NEXT:     CNDE_INT T4.Y, PV.Z, PV.X, PV.Y,
+; R600-NEXT:     MIN_INT T2.Z, T0.Z, literal.x,
+; R600-NEXT:     MUL_IEEE T0.W, T3.Y, literal.y,
+; R600-NEXT:     MUL_IEEE * T5.W, T0.Y, literal.z,
+; R600-NEXT:    381(5.338947e-43), 209715200(1.972152e-31)
+; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; R600-NEXT:     MUL_IEEE T7.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T3.Y, T2.W, PV.W, T3.Y,
+; R600-NEXT:     ADD_INT T2.Z, PV.Z, literal.y,
+; R600-NEXT:     ADD_INT T0.W, T0.Z, literal.z,
+; R600-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; R600-NEXT:    2130706432(1.701412e+38), -254(nan)
 ; R600-NEXT:    -127(nan), 254(3.559298e-43)
-; R600-NEXT:     CNDE_INT T9.X, PS, PV.W, PV.Z,
-; R600-NEXT:     SETGT_INT T3.Y, T0.Z, literal.x,
-; R600-NEXT:     CNDE_INT T0.Z, T3.Z, T2.W, PV.Y, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE_INT T1.W, T5.W, PV.X, T1.W, BS:VEC_021/SCL_122
-; R600-NEXT:     LSHL * T2.W, T1.Y, literal.y,
+; R600-NEXT:     CNDE_INT T8.X, PS, PV.W, PV.Z,
+; R600-NEXT:     SETGT_INT T5.Y, T0.Z, literal.x,
+; R600-NEXT:     CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122
+; R600-NEXT:     CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212
+; R600-NEXT:     LSHL * T4.W, T4.Y, literal.y,
 ; R600-NEXT:    127(1.779649e-43), 23(3.222986e-44)
-; R600-NEXT:     ADD_INT T8.X, PS, literal.x,
-; R600-NEXT:     CNDE_INT T1.Y, T2.Z, PV.W, PV.Z,
-; R600-NEXT:     CNDE_INT T0.Z, PV.Y, T7.X, PV.X,
-; R600-NEXT:     CNDE_INT * T0.W, T6.X, T5.X, T0.W, BS:VEC_021/SCL_122
-; R600-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE * T1.W, T4.X, literal.x,
-; R600-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; R600-NEXT:     CNDE_INT T4.X, T6.W, T4.X, PV.W,
-; R600-NEXT:     CNDE_INT * T2.Y, T4.W, T0.W, T1.X, BS:VEC_120/SCL_212
-; R600-NEXT:    ALU clause starting at 204:
+; R600-NEXT:     ADD_INT T7.X, PS, literal.x,
+; R600-NEXT:     CNDE_INT T0.Y, T1.Z, PV.Z, PV.W,
+; R600-NEXT:     CNDE_INT T0.Z, PV.Y, T6.X, PV.X,
+; R600-NEXT:     MUL_IEEE T0.W, T4.X, literal.y,
+; R600-NEXT:     CNDE_INT * T1.W, T5.X, T2.Y, T1.W,
+; R600-NEXT:    1065353216(1.000000e+00), 2130706432(1.701412e+38)
+; R600-NEXT:     CNDE_INT T5.X, T3.W, PS, T1.Y,
+; R600-NEXT:     CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212
+; R600-NEXT:    ALU clause starting at 201:
 ; R600-NEXT:     LSHL T0.Z, T0.Z, literal.x,
-; R600-NEXT:     MUL_IEEE T0.W, T1.Y, T8.X,
+; R600-NEXT:     MUL_IEEE T0.W, T0.Y, T7.X,
 ; R600-NEXT:     SETGT * T1.W, literal.y, KC0[3].W,
 ; R600-NEXT:    23(3.222986e-44), -1036817932(-4.485347e+01)
-; R600-NEXT:     CNDE T1.X, PS, PV.W, 0.0,
-; R600-NEXT:     SETGT T1.Y, KC0[3].W, literal.x,
+; R600-NEXT:     CNDE T4.X, PS, PV.W, 0.0,
+; R600-NEXT:     SETGT T0.Y, KC0[3].W, literal.x,
 ; R600-NEXT:     ADD_INT T0.Z, PV.Z, literal.y,
-; R600-NEXT:     CNDE_INT T0.W, T3.Y, T2.Y, T4.X, BS:VEC_120/SCL_212
-; R600-NEXT:     CNDE * T1.W, T2.X, T3.X, literal.z,
+; R600-NEXT:     CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221
+; R600-NEXT:     CNDE * T1.W, T0.X, T3.X, literal.z,
 ; R600-NEXT:    1109008539(3.853184e+01), 1065353216(1.000000e+00)
 ; R600-NEXT:    2139095040(INF), 0(0.000000e+00)
-; R600-NEXT:     MUL_IEEE T2.X, PV.W, PV.Z,
+; R600-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
 ; R600-NEXT:     SETGT T2.Y, literal.x, KC0[3].Y,
 ; R600-NEXT:     CNDE T1.Z, PV.Y, PV.X, literal.y,
-; R600-NEXT:     CNDE T0.W, T0.X, T0.Y, 0.0,
+; R600-NEXT:     CNDE T0.W, T2.X, T1.X, 0.0,
 ; R600-NEXT:     SETGT * T2.W, KC0[3].Z, literal.z,
 ; R600-NEXT:    -1036817932(-4.485347e+01), 2139095040(INF)
 ; R600-NEXT:    1109008539(3.853184e+01), 0(0.000000e+00)
@@ -2287,8 +2275,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; CM-LABEL: s_exp10_v4f32:
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 97, @6, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 100, @104, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    ALU 36, @205, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 97, @104, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 35, @202, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
@@ -2307,224 +2295,220 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in)
 ; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
 ; CM-NEXT:     MULADD_IEEE T0.X, T0.W, literal.x, PV.W,
 ; CM-NEXT:     ADD T0.Y, T0.Z, -PV.Z,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.Y, literal.x,
-; CM-NEXT:     MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE * T0.W, PV.Y, literal.x,
 ; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
 ; CM-NEXT:     TRUNC T1.X, T1.Z,
-; CM-NEXT:     RNDNE T2.Y, PV.W,
-; CM-NEXT:     MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z,
-; CM-NEXT:     ADD * T1.W, PV.Y, PV.X,
+; CM-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W,
+; CM-NEXT:     RNDNE T1.Z, PV.Z,
+; CM-NEXT:     ADD * T0.W, PV.Y, PV.X,
 ; CM-NEXT:    1079283712(3.321289e+00), 0(0.000000e+00)
+; CM-NEXT:     EXP_IEEE T0.X, T0.W,
+; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
+; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
+; CM-NEXT:     TRUNC T2.X, T1.Z,
+; CM-NEXT:     MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y,
+; CM-NEXT:     FLT_TO_INT T2.Z, T1.X,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    975668412(6.390323e-04), 209715200(1.972152e-31)
+; CM-NEXT:     ADD T1.X, T0.Z, -T1.Z,
+; CM-NEXT:     MUL_IEEE T1.Y, PV.W, literal.x,
+; CM-NEXT:     MAX_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
+; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T0.Z, T2.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T1.W, T2.Z, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
+; CM-NEXT:    102(1.429324e-43), -229(nan)
+; CM-NEXT:     ADD_INT T4.X, T2.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, T2.Z, literal.y,
+; CM-NEXT:     CNDE_INT T0.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T2.W, T2.Z, literal.x,
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     MUL_IEEE T5.X, T0.X, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Z, T2.Z,
+; CM-NEXT:     CNDE_INT T0.Z, PV.Y, PV.X, T3.X,
+; CM-NEXT:     SETGT_INT * T3.W, T2.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 127(1.779649e-43)
+; CM-NEXT:     AND_INT T3.X, KC0[3].Z, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T1.W, T1.Y, T0.W,
+; CM-NEXT:    -4096(nan), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T1.Y, T3.Y, T5.X, PV.Z,
+; CM-NEXT:     LSHL T0.Z, PV.Y, literal.x,
+; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
+; CM-NEXT:    23(3.222986e-44), 1079283712(3.321289e+00)
+; CM-NEXT:     RNDNE T4.X, PV.W,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Z, T3.W, PV.X, PV.Y,
+; CM-NEXT:     ADD * T1.W, T1.X, T0.Y,
+; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
 ; CM-NEXT:     EXP_IEEE T0.X, T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T1.W,
 ; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T1.W,
-; CM-NEXT:     MULADD_IEEE T2.X, T2.W, literal.x, T0.Z,
-; CM-NEXT:     ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212
-; CM-NEXT:     FLT_TO_INT T0.Z, T1.X,
-; CM-NEXT:     MUL_IEEE * T0.W, PV.X, literal.y,
-; CM-NEXT:    975668412(6.390323e-04), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
+; CM-NEXT:     MUL_IEEE T1.X, T0.Z, T2.Y,
+; CM-NEXT:     TRUNC T0.Y, T4.X,
+; CM-NEXT:     FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MUL_IEEE * T1.W, PV.X, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T2.X, PV.W, literal.x,
 ; CM-NEXT:     MUL_IEEE T1.Y, T0.X, literal.y,
 ; CM-NEXT:     MAX_INT T1.Z, PV.Z, literal.z,
-; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.w,
+; CM-NEXT:     MIN_INT * T2.W, PV.Z, literal.w,
 ; CM-NEXT:    209715200(1.972152e-31), 2130706432(1.701412e+38)
 ; CM-NEXT:    -330(nan), 381(5.338947e-43)
-; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T3.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
 ; CM-NEXT:     ADD_INT T1.Z, T0.Z, literal.z,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.Z, literal.w,
+; CM-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; CM-NEXT:    -254(nan), 204(2.858649e-43)
 ; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     ADD_INT T4.X, T0.Z, literal.x,
-; CM-NEXT:     SETGT_UINT T4.Y, T0.Z, literal.y,
+; CM-NEXT:     ADD_INT T6.X, T0.Z, literal.x,
+; CM-NEXT:     SETGT_UINT T3.Y, T0.Z, literal.y,
 ; CM-NEXT:     CNDE_INT T1.Z, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT * T2.W, T0.Z, literal.x,
+; CM-NEXT:     SETGT_INT * T3.W, T0.Z, literal.x,
 ; CM-NEXT:    -127(nan), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, PV.Z, T0.Z,
-; CM-NEXT:     CNDE_INT T3.Y, PV.Y, PV.X, T3.X,
-; CM-NEXT:     SETGT_INT T0.Z, T0.Z, literal.x,
-; CM-NEXT:     MUL_IEEE * T3.W, T1.Y, literal.y,
-; CM-NEXT:    127(1.779649e-43), 2130706432(1.701412e+38)
-; CM-NEXT:     CNDE_INT T3.X, T4.Y, T1.Y, PV.W,
-; CM-NEXT:     AND_INT T1.Y, KC0[3].Z, literal.x,
-; CM-NEXT:     CNDE_INT T1.Z, PV.Z, PV.X, PV.Y,
-; CM-NEXT:     CNDE_INT * T0.W, T1.W, T1.X, T0.W,
-; CM-NEXT:    -4096(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T0.X, T2.W, PV.W, T0.X,
-; CM-NEXT:     LSHL T3.Y, PV.Z, literal.x,
-; CM-NEXT:     TRUNC T1.Z, T2.Y,
-; CM-NEXT:     ADD * T0.W, KC0[3].Z, -PV.Y,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T1.X, PV.W, literal.x,
-; CM-NEXT:     FLT_TO_INT T2.Y, PV.Z,
-; CM-NEXT:     ADD_INT T1.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T1.W, T0.Z, PV.X, T3.X,
-; CM-NEXT:    975668412(6.390323e-04), 1065353216(1.000000e+00)
-; CM-NEXT:     MUL_IEEE T0.X, PV.W, PV.Z,
-; CM-NEXT:     MIN_INT T3.Y, PV.Y, literal.x,
-; CM-NEXT:     MULADD_IEEE T0.Z, T0.W, literal.y, PV.X,
-; CM-NEXT:     ADD * T0.W, T0.Y, T2.X,
-; CM-NEXT:    381(5.338947e-43), 1079283712(3.321289e+00)
-; CM-NEXT:     EXP_IEEE T0.X (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE T0.Y, T0.W,
-; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
-; CM-NEXT:     EXP_IEEE * T0.W (MASKED), T0.W,
-; CM-NEXT:     MULADD_IEEE T1.X, T1.Y, literal.x, T0.Z,
-; CM-NEXT:     MUL_IEEE T4.Y, PV.Y, literal.y,
-; CM-NEXT:     ADD_INT T0.Z, T3.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:     MAX_INT * T0.W, T2.Y, literal.w, BS:VEC_201
-; CM-NEXT:    975668412(6.390323e-04), 2130706432(1.701412e+38)
-; CM-NEXT:    -254(nan), -330(nan)
-; CM-NEXT:     ADD_INT T2.X, T2.Y, literal.x,
-; CM-NEXT:     ADD_INT T3.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T1.Z, T2.Y, literal.z,
-; CM-NEXT:     SETGT_UINT * T0.W, T2.Y, literal.w,
-; CM-NEXT:    -127(nan), 204(2.858649e-43)
-; CM-NEXT:    102(1.429324e-43), -229(nan)
-; CM-NEXT:     SETGT_UINT T3.X, T2.Y, literal.x,
-; CM-NEXT:     CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T1.Z, T2.Y, literal.y,
-; CM-NEXT:     MUL_IEEE * T1.W, T0.Y, literal.z, BS:VEC_120/SCL_212
-; CM-NEXT:    254(3.559298e-43), -127(nan)
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T4.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT * T3.Y, PV.Z, PV.Y, T2.Y,
-; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 104:
-; CM-NEXT:     CNDE_INT T0.Z, T3.X, T2.X, T0.Z,
-; CM-NEXT:     SETGT_INT * T2.W, T2.Y, literal.x,
+; CM-NEXT:     CNDE_INT T7.X, PV.W, PV.Z, T0.Z,
+; CM-NEXT:     CNDE_INT T2.Y, PV.Y, PV.X, T5.X,
+; CM-NEXT:     SETGT_INT * T0.Z, T0.Z, literal.x,
 ; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T2.X, T1.Y, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, T3.Y, PV.Z,
-; CM-NEXT:     CNDE_INT T0.Z, T0.W, T4.X, T1.W,
-; CM-NEXT:     MUL_IEEE * T0.W, T4.Y, literal.y, BS:VEC_201
-; CM-NEXT:    1079283712(3.321289e+00), 2130706432(1.701412e+38)
-; CM-NEXT:     AND_INT T4.X, KC0[4].X, literal.x,
-; CM-NEXT:     CNDE_INT T2.Y, T3.X, T4.Y, PV.W,
-; CM-NEXT:     CNDE_INT T0.Z, T1.Z, PV.Z, T0.Y,
-; CM-NEXT:     LSHL * T0.W, PV.Y, literal.y,
-; CM-NEXT:    -4096(nan), 23(3.222986e-44)
-; CM-NEXT:     ADD_INT T3.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T2.W, PV.Z, PV.Y,
-; CM-NEXT:     MUL_IEEE T0.Z, PV.X, literal.y,
-; CM-NEXT:     RNDNE * T0.W, T2.X,
-; CM-NEXT:    1065353216(1.000000e+00), 1079283712(3.321289e+00)
-; CM-NEXT:     ADD T2.X, T2.X, -PV.W,
-; CM-NEXT:     RNDNE T1.Y, PV.Z,
-; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, PV.X,
-; CM-NEXT:     SETGT * T1.W, literal.x, KC0[3].W,
-; CM-NEXT:    -1036817932(-4.485347e+01), 0(0.000000e+00)
-; CM-NEXT:     CNDE T3.X, PV.W, PV.Z, 0.0,
-; CM-NEXT:     TRUNC T0.Y, T0.W,
-; CM-NEXT:     TRUNC T1.Z, PV.Y,
-; CM-NEXT:     ADD * T0.W, PV.X, T1.X,
+; CM-NEXT:    ALU clause starting at 104:
+; CM-NEXT:     ADD * T4.W, KC0[3].Z, -T3.X,
+; CM-NEXT:     MUL_IEEE T5.X, PV.W, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, T0.Z, T7.X, T2.Y,
+; CM-NEXT:     MUL_IEEE T1.Z, T1.Y, literal.y,
+; CM-NEXT:     CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122
+; CM-NEXT:    975668412(6.390323e-04), 2130706432(1.701412e+38)
+; CM-NEXT:     CNDE_INT T0.X, T3.W, PV.W, T0.X,
+; CM-NEXT:     CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z,
+; CM-NEXT:     LSHL T1.Z, PV.Y, literal.x,
+; CM-NEXT:     MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212
+; CM-NEXT:    23(3.222986e-44), 1079283712(3.321289e+00)
+; CM-NEXT:     MULADD_IEEE T2.X, T3.X, literal.x, PV.W,
+; CM-NEXT:     ADD T2.Y, T0.W, -T4.X,
+; CM-NEXT:     ADD_INT T1.Z, PV.Z, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T0.Z, PV.X, PV.Y,
+; CM-NEXT:    975668412(6.390323e-04), 1065353216(1.000000e+00)
+; CM-NEXT:     AND_INT T0.X, KC0[4].X, literal.x,
+; CM-NEXT:     MUL_IEEE T1.Y, PV.W, PV.Z,
+; CM-NEXT:     SETGT T0.Z, literal.y, KC0[3].W,
+; CM-NEXT:     ADD * T0.W, PV.Y, PV.X,
+; CM-NEXT:    -4096(nan), -1036817932(-4.485347e+01)
 ; CM-NEXT:     EXP_IEEE T0.X (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE T0.Y (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE T0.Z (MASKED), T0.W,
 ; CM-NEXT:     EXP_IEEE * T0.W, T0.W,
-; CM-NEXT:     FLT_TO_INT T1.X, T1.Z,
-; CM-NEXT:     FLT_TO_INT T0.Y, T0.Y,
-; CM-NEXT:     MUL_IEEE T1.Z, PV.W, literal.x,
-; CM-NEXT:     ADD * T1.W, KC0[4].X, -T4.X,
-; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     MUL_IEEE T2.X, PV.W, literal.x,
-; CM-NEXT:     MUL_IEEE T2.Y, T0.W, literal.y,
-; CM-NEXT:     MUL_IEEE T2.Z, PV.Z, literal.z,
-; CM-NEXT:     SETGT_UINT * T2.W, PV.Y, literal.w,
-; CM-NEXT:    975668412(6.390323e-04), 209715200(1.972152e-31)
-; CM-NEXT:    2130706432(1.701412e+38), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T5.X, PV.W, T1.Z, PV.Z,
-; CM-NEXT:     MUL_IEEE T3.Y, PV.Y, literal.x,
-; CM-NEXT:     MULADD_IEEE T1.Z, T1.W, literal.y, PV.X,
-; CM-NEXT:     MAX_INT * T1.W, T1.X, literal.z,
-; CM-NEXT:    209715200(1.972152e-31), 1079283712(3.321289e+00)
-; CM-NEXT:    -330(nan), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T4.Y, T1.X, literal.y,
-; CM-NEXT:     MULADD_IEEE T1.Z, T4.X, literal.z, PV.Z, BS:VEC_120/SCL_212
-; CM-NEXT:     MAX_INT * T1.W, T0.Y, literal.w,
-; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
+; CM-NEXT:     CNDE T2.X, T0.Z, T1.Y, 0.0,
+; CM-NEXT:     ADD T1.Y, KC0[4].X, -T0.X,
+; CM-NEXT:     FLT_TO_INT T0.Z, T0.Y,
+; CM-NEXT:     MUL_IEEE * T1.W, PV.W, literal.x,
+; CM-NEXT:    209715200(1.972152e-31), 0(0.000000e+00)
+; CM-NEXT:     MUL_IEEE T3.X, PV.W, literal.x,
+; CM-NEXT:     SETGT_UINT T0.Y, PV.Z, literal.y,
+; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, literal.z,
+; CM-NEXT:     MUL_IEEE * T2.W, T0.X, literal.w,
+; CM-NEXT:    209715200(1.972152e-31), -229(nan)
+; CM-NEXT:    975668412(6.390323e-04), 1079283712(3.321289e+00)
+; CM-NEXT:     RNDNE T4.X, PV.W,
+; CM-NEXT:     MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z,
+; CM-NEXT:     CNDE_INT T1.Z, PV.Y, PV.X, T1.W,
+; CM-NEXT:     SETGT_INT * T1.W, T0.Z, literal.y,
+; CM-NEXT:    1079283712(3.321289e+00), -127(nan)
+; CM-NEXT:     CNDE_INT T3.X, PV.W, PV.Z, T0.W,
+; CM-NEXT:     MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y,
+; CM-NEXT:     ADD T1.Z, T2.W, -PV.X,
+; CM-NEXT:     MAX_INT * T2.W, T0.Z, literal.y,
 ; CM-NEXT:    975668412(6.390323e-04), -330(nan)
-; CM-NEXT:     ADD T4.X, T0.Z, -T1.Y,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT:     SETGT_UINT * T1.W, T0.Y, literal.z,
+; CM-NEXT:     ADD_INT T0.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, T0.Z, literal.y,
+; CM-NEXT:     TRUNC T2.Z, T4.X,
+; CM-NEXT:     ADD * T2.W, PV.Z, PV.Y,
 ; CM-NEXT:    204(2.858649e-43), 102(1.429324e-43)
-; CM-NEXT:    -229(nan), 0(0.000000e+00)
-; CM-NEXT:     SETGT_UINT T6.X, T1.X, literal.x,
-; CM-NEXT:     CNDE_INT T1.Y, PV.W, PV.Y, PV.Z,
-; CM-NEXT:     SETGT_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT:     ADD * T3.W, PV.X, T1.Z,
-; CM-NEXT:    -229(nan), -127(nan)
-; CM-NEXT:     EXP_IEEE T1.X (MASKED), T3.W,
-; CM-NEXT:     EXP_IEEE T1.Y (MASKED), T3.W,
-; CM-NEXT:     EXP_IEEE T1.Z, T3.W,
-; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T3.W,
-; CM-NEXT:     CNDE_INT T4.X, T0.Z, T1.Y, T0.Y,
-; CM-NEXT:     CNDE_INT T1.Y, T6.X, T2.X, T4.Y, BS:VEC_120/SCL_212
-; CM-NEXT:     SETGT_INT T2.Z, T1.X, literal.x,
-; CM-NEXT:     MUL_IEEE * T3.W, PV.Z, literal.y,
-; CM-NEXT:    -127(nan), 209715200(1.972152e-31)
-; CM-NEXT:     MUL_IEEE T2.X, T1.Z, literal.x,
-; CM-NEXT:     MUL_IEEE T4.Y, PV.W, literal.y,
-; CM-NEXT:     CNDE_INT T3.Z, PV.Z, PV.Y, T1.X,
-; CM-NEXT:     MIN_INT * T4.W, T1.X, literal.z,
+; CM-NEXT:     EXP_IEEE T1.X (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE T1.Y, T2.W,
+; CM-NEXT:     EXP_IEEE T1.Z (MASKED), T2.W,
+; CM-NEXT:     EXP_IEEE * T1.W (MASKED), T2.W,
+; CM-NEXT:     MUL_IEEE T4.X, T0.W, literal.x,
+; CM-NEXT:     FLT_TO_INT T3.Y, T2.Z,
+; CM-NEXT:     MUL_IEEE T1.Z, PV.Y, literal.y,
+; CM-NEXT:     CNDE_INT * T0.W, T0.Y, T0.X, T2.Y,
 ; CM-NEXT:    2130706432(1.701412e+38), 209715200(1.972152e-31)
+; CM-NEXT:     CNDE_INT T0.X, T1.W, PV.W, T0.Z,
+; CM-NEXT:     MUL_IEEE T0.Y, PV.Z, literal.x,
+; CM-NEXT:     MAX_INT T2.Z, PV.Y, literal.y,
+; CM-NEXT:     MIN_INT * T0.W, PV.Y, literal.z,
+; CM-NEXT:    209715200(1.972152e-31), -330(nan)
 ; CM-NEXT:    381(5.338947e-43), 0(0.000000e+00)
-; CM-NEXT:     MIN_INT T7.X, T0.Y, literal.x,
-; CM-NEXT:     ADD_INT T1.Y, PV.W, literal.y,
-; CM-NEXT:     ADD_INT T4.Z, T1.X, literal.z,
-; CM-NEXT:     SETGT_UINT * T4.W, T1.X, literal.w,
-; CM-NEXT:    381(5.338947e-43), -254(nan)
+; CM-NEXT:     ADD_INT T5.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT T2.Y, PV.Z, literal.y,
+; CM-NEXT:     ADD_INT T2.Z, T3.Y, literal.z,
+; CM-NEXT:     SETGT_UINT * T0.W, T3.Y, literal.w,
+; CM-NEXT:    -254(nan), 204(2.858649e-43)
+; CM-NEXT:    102(1.429324e-43), -229(nan)
+; CM-NEXT:     ADD_INT T6.X, T3.Y, literal.x,
+; CM-NEXT:     SETGT_UINT T4.Y, T3.Y, literal.y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.W, PV.Y, PV.Z,
+; CM-NEXT:     SETGT_INT * T1.W, T3.Y, literal.x,
 ; CM-NEXT:    -127(nan), 254(3.559298e-43)
-; CM-NEXT:     CNDE_INT T8.X, PV.W, PV.Z, PV.Y,
-; CM-NEXT:     SETGT_INT T1.Y, T1.X, literal.x,
-; CM-NEXT:     ADD_INT T4.Z, PV.X, literal.y,
-; CM-NEXT:     ADD_INT * T5.W, T0.Y, literal.z,
+; CM-NEXT:     MUL_IEEE T7.X, T1.Y, literal.x,
+; CM-NEXT:     CNDE_INT T2.Y, PV.W, PV.Z, T3.Y,
+; CM-NEXT:     CNDE_INT T2.Z, PV.Y, PV.X, T5.X,
+; CM-NEXT:     MIN_INT * T2.W, T0.Z, literal.y,
+; CM-NEXT:    2130706432(1.701412e+38), 381(5.338947e-43)
+; CM-NEXT:     SETGT_INT T5.X, T3.Y, literal.x,
+; CM-NEXT:     ADD_INT T3.Y, PV.W, literal.y,
+; CM-NEXT:     ADD_INT T3.Z, T0.Z, literal.z,
+; CM-NEXT:     SETGT_UINT * T2.W, T0.Z, literal.w,
 ; CM-NEXT:    127(1.779649e-43), -254(nan)
-; CM-NEXT:    -127(nan), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT T1.X, T2.W, PV.W, PV.Z,
-; CM-NEXT:     CNDE_INT T5.Y, PV.Y, T3.Z, PV.X,
-; CM-NEXT:     CNDE_INT T3.Z, T6.X, T4.Y, T3.W,
-; CM-NEXT:     MUL_IEEE * T2.W, T2.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    -127(nan), 254(3.559298e-43)
+; CM-NEXT:     CNDE_INT T6.X, PV.W, PV.Z, PV.Y,
+; CM-NEXT:     CNDE_INT T2.Y, PV.X, T2.Y, T2.Z,
+; CM-NEXT:     MUL_IEEE T2.Z, T7.X, literal.x,
+; CM-NEXT:     CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122
 ; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
-; CM-NEXT:     SETGT_INT T6.X, T0.Y, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T4.W, T2.X, PV.W,
-; CM-NEXT:     CNDE_INT * T1.Z, T2.Z, PV.Z, T1.Z,
-; CM-NEXT:    127(1.779649e-43), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 205:
-; CM-NEXT:     LSHL * T2.W, T5.Y, literal.x,
-; CM-NEXT:    23(3.222986e-44), 0(0.000000e+00)
-; CM-NEXT:     ADD_INT T2.X, PV.W, literal.x,
-; CM-NEXT:     CNDE_INT T0.Y, T1.Y, T1.Z, T0.Y,
-; CM-NEXT:     CNDE_INT * T1.Z, T6.X, T4.X, T1.X,
+; CM-NEXT:     SETGT_INT T8.X, T0.Z, literal.x,
+; CM-NEXT:     CNDE_INT T0.Y, T1.W, PV.W, T1.Y,
+; CM-NEXT:     CNDE_INT T0.Z, T4.Y, T7.X, PV.Z,
+; CM-NEXT:     LSHL * T0.W, PV.Y, literal.y,
+; CM-NEXT:    127(1.779649e-43), 23(3.222986e-44)
+; CM-NEXT:    ALU clause starting at 202:
+; CM-NEXT:     ADD_INT T7.X, T0.W, literal.x,
+; CM-NEXT:     CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z,
 ; CM-NEXT:    1065353216(1.000000e+00), 0(0.000000e+00)
-; CM-NEXT:     CNDE_INT * T1.W, T1.W, T3.Y, T2.Y,
-; CM-NEXT:     CNDE_INT T1.X, T0.Z, PV.W, T0.W,
-; CM-NEXT:     LSHL T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:     MUL_IEEE T0.Z, T0.Y, T2.X,
+; CM-NEXT:     CNDE_INT * T0.Z, T8.X, T0.X, T6.X,
+; CM-NEXT:     MUL_IEEE * T0.W, T4.X, literal.x,
+; CM-NEXT:    2130706432(1.701412e+38), 0(0.000000e+00)
+; CM-NEXT:     CNDE_INT T0.X, T2.W, T4.X, PV.W,
+; CM-NEXT:     LSHL T1.Y, T0.Z, literal.x,
+; CM-NEXT:     MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122
 ; CM-NEXT:     SETGT * T0.W, literal.y, KC0[4].X,
 ; CM-NEXT:    23(3.222986e-44), -1036817932(-4.485347e+01)
-; CM-NEXT:     CNDE T2.X, PV.W, PV.Z, 0.0,
+; CM-NEXT:     CNDE T4.X, PV.W, PV.Z, 0.0,
 ; CM-NEXT:     SETGT T0.Y, KC0[4].X, literal.x,
 ; CM-NEXT:     ADD_INT T0.Z, PV.Y, literal.y,
-; CM-NEXT:     CNDE_INT * T0.W, T6.X, PV.X, T5.X,
+; CM-NEXT:     CNDE_INT * T0.W, T8.X, T3.X, PV.X,
 ; CM-NEXT:    1109008539(3.853184e+01), 1065353216(1.000000e+00)
-; CM-NEXT:     SETGT T1.X, KC0[3].W, literal.x,
+; CM-NEXT:     SETGT T0.X, KC0[3].W, literal.x,
 ; CM-NEXT:     MUL_IEEE T1.Y, PV.W, PV.Z,
 ; CM-NEXT:     SETGT T0.Z, literal.y, KC0[3].Z,
 ; CM-NEXT:     CNDE * T0.W, PV.Y, PV.X, literal.z,
 ; CM-NEXT:    1109008539(3.853184e+01), -1036817932(-4.485347e+01)
 ; CM-NEXT:    2139095040(INF), 0(0.000000e+00)
-; CM-NEXT:     SETGT T2.X, literal.x, KC0[3].Y,
+; CM-NEXT:     SETGT T3.X, literal.x, KC0[3].Y,
 ; CM-NEXT:     CNDE T0.Y, PV.Z, PV.Y, 0.0,
-; CM-NEXT:     CNDE T0.Z, PV.X, T3.X, literal.y,
+; CM-NEXT:     CNDE T0.Z, PV.X, T2.X, literal.y,
 ; CM-NEXT:     SETGT * T1.W, KC0[3].Z, literal.z,
 ; CM-NEXT:    -1036817932(-4.485347e+01), 2139095040(INF)
 ; CM-NEXT:    1109008539(3.853184e+01), 0(0.000000e+00)
 ; CM-NEXT:     CNDE T0.Y, PV.W, PV.Y, literal.x,
-; CM-NEXT:     CNDE T1.Z, PV.X, T0.X, 0.0,
+; CM-NEXT:     CNDE T1.Z, PV.X, T1.X, 0.0,
 ; CM-NEXT:     SETGT * T1.W, KC0[3].Y, literal.y,
 ; CM-NEXT:    2139095040(INF), 1109008539(3.853184e+01)
 ; CM-NEXT:     CNDE * T0.X, PV.W, PV.Z, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index b1a82daa8e7db..b3f4790df4d48 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -795,17 +795,17 @@ define amdgpu_kernel void @shl_i64(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     AND_INT T1.Y, T0.Z, literal.x,
-; EG-NEXT:     LSHR T1.Z, T0.Y, 1,
+; EG-NEXT:     LSHR T1.Y, T0.Y, 1,
+; EG-NEXT:     NOT_INT T1.Z, T0.Z,
 ; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
-; EG-NEXT:     NOT_INT * T1.W, T0.Z,
+; EG-NEXT:     AND_INT * T1.W, T0.Z, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL T0.W, T0.X, PV.Y,
+; EG-NEXT:     LSHL T2.Z, T0.X, PS,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
 ; EG-NEXT:     AND_INT * T1.W, T0.Z, literal.x,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT:     CNDE_INT T0.X, T1.W, T2.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
@@ -858,8 +858,8 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 22, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
+; EG-NEXT:    ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
@@ -868,27 +868,28 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG-NEXT:    ALU clause starting at 10:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     AND_INT T1.Y, T1.Z, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T1.Z, literal.x,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL T2.X, T0.Z, PV.W,
+; EG-NEXT:     AND_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:     LSHR T2.Z, T0.W, 1,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, BS:VEC_102/SCL_221
 ; EG-NEXT:     NOT_INT * T1.W, T1.Z,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T3.X, PV.Z, PV.W, PS,
+; EG-NEXT:     LSHR T2.Y, T0.Y, 1,
+; EG-NEXT:     NOT_INT T0.Z, T1.X,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
+; EG-NEXT:     AND_INT * T1.W, T1.X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL * T1.W, T0.Z, PV.Y,
-; EG-NEXT:     AND_INT T2.X, T1.Z, literal.x,
-; EG-NEXT:     AND_INT T1.Y, T1.X, literal.y,
-; EG-NEXT:     LSHR T0.Z, T0.Y, 1,
-; EG-NEXT:     BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
-; EG-NEXT:     NOT_INT * T3.W, T1.X,
-; EG-NEXT:    32(4.484155e-44), 31(4.344025e-44)
-; EG-NEXT:     BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL T0.Z, T0.X, PV.Y,
-; EG-NEXT:     AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     CNDE_INT * T3.W, PV.X, T0.W, T1.W,
+; EG-NEXT:     LSHL T0.Y, T0.X, PS, BS:VEC_120/SCL_212
+; EG-NEXT:     AND_INT T1.Z, T1.X, literal.x, BS:VEC_201
+; EG-NEXT:     BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
+; EG-NEXT:     CNDE_INT * T2.W, T1.Y, PV.X, T2.X,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
-; EG-NEXT:     CNDE_INT * T3.Z, T2.X, T1.W, 0.0,
-; EG-NEXT:     CNDE_INT T3.X, T2.W, T0.Z, 0.0,
+; EG-NEXT:     CNDE_INT T2.Y, PV.Z, PV.W, PV.Y,
+; EG-NEXT:     CNDE_INT * T2.Z, T1.Y, T2.X, 0.0,
+; EG-NEXT:     CNDE_INT T2.X, T1.Z, T0.Y, 0.0,
 ; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
@@ -955,65 +956,66 @@ define amdgpu_kernel void @shl_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @6
-; EG-NEXT:    ALU 47, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1
+; EG-NEXT:    ALU 48, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T1.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 48, #1
-; EG-NEXT:     VTX_READ_128 T2.XYZW, T0.X, 0, #1
-; EG-NEXT:     VTX_READ_128 T3.XYZW, T0.X, 32, #1
-; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 32, #1
+; EG-NEXT:     VTX_READ_128 T2.XYZW, T0.X, 48, #1
+; EG-NEXT:     VTX_READ_128 T3.XYZW, T0.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 14:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 15:
-; EG-NEXT:     AND_INT T4.Z, T1.Z, literal.x,
-; EG-NEXT:     LSHR T1.W, T0.W, 1,
-; EG-NEXT:     NOT_INT * T3.W, T1.Z,
+; EG-NEXT:     AND_INT * T1.W, T1.Z, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1,
-; EG-NEXT:     AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201
-; EG-NEXT:     LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212
-; EG-NEXT:     BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221
-; EG-NEXT:     NOT_INT * T2.W, T3.Z,
-; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL T2.Z, T2.Z, PV.Y,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W,
-; EG-NEXT:     LSHL * T1.W, T0.Z, T4.Z,
+; EG-NEXT:     LSHL * T1.W, T0.Z, PV.W,
 ; EG-NEXT:     AND_INT T4.X, T1.Z, literal.x,
-; EG-NEXT:     AND_INT T1.Y, T1.X, literal.y,
-; EG-NEXT:     LSHR T0.Z, T0.Y, 1,
-; EG-NEXT:     BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
-; EG-NEXT:     NOT_INT * T3.W, T1.X,
+; EG-NEXT:     LSHR T1.Y, T3.W, 1,
+; EG-NEXT:     NOT_INT T4.Z, T2.Z, BS:VEC_201
+; EG-NEXT:     BIT_ALIGN_INT T2.W, T3.W, T3.Z, 1,
+; EG-NEXT:     AND_INT * T3.W, T2.Z, literal.y,
 ; EG-NEXT:    32(4.484155e-44), 31(4.344025e-44)
-; EG-NEXT:     AND_INT T5.X, T3.Z, literal.x,
-; EG-NEXT:     BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL T0.Z, T0.X, PV.Y,
-; EG-NEXT:     AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     CNDE_INT * T4.W, PV.X, T0.W, T1.W,
+; EG-NEXT:     LSHL T5.X, T3.Z, PS,
+; EG-NEXT:     AND_INT T2.Y, T2.Z, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     BIT_ALIGN_INT T2.Z, PV.Y, PV.W, PV.Z,
+; EG-NEXT:     LSHR T2.W, T3.Y, 1,
+; EG-NEXT:     NOT_INT * T3.W, T2.X,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T6.X, T3.Y, T3.X, 1,
+; EG-NEXT:     AND_INT T1.Y, T2.X, literal.x,
+; EG-NEXT:     LSHR T3.Z, T0.W, 1,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
+; EG-NEXT:     NOT_INT * T4.W, T1.Z,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T7.X, PV.Z, PV.W, PS,
+; EG-NEXT:     LSHL T1.Y, T3.X, PV.Y, BS:VEC_120/SCL_212
+; EG-NEXT:     AND_INT T0.Z, T2.X, literal.x, BS:VEC_201
+; EG-NEXT:     BIT_ALIGN_INT T0.W, T2.W, PV.X, T3.W,
+; EG-NEXT:     CNDE_INT * T3.W, T2.Y, T2.Z, T5.X,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T0.X, T3.X, literal.x,
-; EG-NEXT:     CNDE_INT T4.Y, PV.W, PV.Y, PV.Z,
-; EG-NEXT:     LSHR T1.Z, T2.Y, 1,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1,
-; EG-NEXT:     NOT_INT * T3.W, T3.X,
+; EG-NEXT:     LSHR T2.X, T0.Y, 1,
+; EG-NEXT:     CNDE_INT T3.Y, PV.Z, PV.W, PV.Y,
+; EG-NEXT:     NOT_INT T1.Z, T1.X,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
+; EG-NEXT:     AND_INT * T2.W, T1.X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS,
-; EG-NEXT:     LSHL T0.Y, T2.X, PV.X,
-; EG-NEXT:     CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212
-; EG-NEXT:     AND_INT * T0.W, T3.X, literal.x, BS:VEC_201
+; EG-NEXT:     LSHL T0.X, T0.X, PS,
+; EG-NEXT:     AND_INT T0.Y, T1.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     CNDE_INT T3.Z, T2.Y, T5.X, 0.0, BS:VEC_021/SCL_122
+; EG-NEXT:     BIT_ALIGN_INT * T0.W, PV.X, PV.W, PV.Z,
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT * T1.W, T5.X, T3.Y, T2.Z,
-; EG-NEXT:     CNDE_INT T4.X, T2.W, T0.Z, 0.0,
-; EG-NEXT:     CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212
-; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.x,
+; EG-NEXT:     CNDE_INT * T2.W, T4.X, T7.X, T1.W,
+; EG-NEXT:     CNDE_INT T3.X, T0.Z, T1.Y, 0.0,
+; EG-NEXT:     CNDE_INT T2.Y, T0.Y, T0.W, T0.X,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T0.X, PV.W, literal.x,
-; EG-NEXT:     CNDE_INT T1.Z, T5.X, T2.Z, 0.0,
-; EG-NEXT:     CNDE_INT * T1.X, T0.W, T0.Y, 0.0,
+; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
+; EG-NEXT:     CNDE_INT T2.Z, T4.X, T1.W, 0.0,
+; EG-NEXT:     CNDE_INT * T2.X, T0.Y, T0.X, 0.0,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
   %a = load <4 x i64>, ptr addrspace(1) %in
@@ -1172,17 +1174,17 @@ define amdgpu_kernel void @s_shl_constant_i64(ptr addrspace(1) %out, i64 %a) {
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
-; EG-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT:     MOV T0.W, literal.y,
-; EG-NEXT:     NOT_INT * T1.W, KC0[2].W,
-; EG-NEXT:    31(4.344025e-44), -1(nan)
-; EG-NEXT:     BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
-; EG-NEXT:     LSHL T0.W, literal.y, PV.Z,
+; EG-NEXT:     MOV T0.Z, literal.x,
+; EG-NEXT:     NOT_INT T0.W, KC0[2].W,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.y,
+; EG-NEXT:    -1(nan), 31(4.344025e-44)
+; EG-NEXT:     LSHL T1.Z, literal.x, PS,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
 ; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT:    32767(4.591635e-41), -1(nan)
+; EG-NEXT:    -1(nan), 32767(4.591635e-41)
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT:     CNDE_INT T0.X, T1.W, T1.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 281474976710655, %a
@@ -1423,15 +1425,15 @@ define amdgpu_kernel void @s_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr ad
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
-; EG-NEXT:     NOT_INT T0.W, KC0[2].W,
-; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.x,
+; EG-NEXT:     AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT:     NOT_INT * T1.W, KC0[2].W,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHL T0.Z, literal.x, PS,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
-; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.y,
-; EG-NEXT:    64(8.968310e-44), 32(4.484155e-44)
-; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.W, PV.Z,
-; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.Z, 0.0,
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
+; EG-NEXT:     AND_INT T1.W, KC0[2].W, literal.x,
+; EG-NEXT:     LSHL * T0.W, literal.y, PV.W,
+; EG-NEXT:    32(4.484155e-44), 64(8.968310e-44)
+; EG-NEXT:     CNDE_INT * T0.Y, PV.W, PV.Z, PS,
+; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 64, %a
@@ -1903,16 +1905,16 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, p
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
-; EG-NEXT:     NOT_INT T0.W, KC0[2].W,
-; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.x,
+; EG-NEXT:     AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT:     NOT_INT * T1.W, KC0[2].W,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHL T0.Z, literal.x, PS,
-; EG-NEXT:     BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
-; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT:    1082130432(4.000000e+00), 541065216(1.626303e-19)
-; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.W, PV.Z,
-; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.Z, 0.0,
+; EG-NEXT:     BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
+; EG-NEXT:     AND_INT T1.W, KC0[2].W, literal.y,
+; EG-NEXT:     LSHL * T0.W, literal.z, PV.W,
+; EG-NEXT:    541065216(1.626303e-19), 32(4.484155e-44)
+; EG-NEXT:    1082130432(4.000000e+00), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT * T0.Y, PV.W, PV.Z, PS,
+; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 1082130432, %a
@@ -1959,17 +1961,17 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %ou
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    ALU clause starting at 4:
-; EG-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
-; EG-NEXT:     MOV T0.W, literal.y,
-; EG-NEXT:     NOT_INT * T1.W, KC0[2].W,
-; EG-NEXT:    31(4.344025e-44), -532676608(-5.534023e+19)
-; EG-NEXT:     BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
-; EG-NEXT:     LSHL T0.W, literal.y, PV.Z,
+; EG-NEXT:     MOV T0.Z, literal.x,
+; EG-NEXT:     NOT_INT T0.W, KC0[2].W,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.y,
+; EG-NEXT:    -532676608(-5.534023e+19), 31(4.344025e-44)
+; EG-NEXT:     LSHL T1.Z, literal.x, PS,
+; EG-NEXT:     BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
 ; EG-NEXT:     AND_INT * T1.W, KC0[2].W, literal.z,
-; EG-NEXT:    2147483647(nan), -1065353216(-4.000000e+00)
+; EG-NEXT:    -1065353216(-4.000000e+00), 2147483647(nan)
 ; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.Z, PV.W,
-; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
+; EG-NEXT:     CNDE_INT * T0.Y, PS, PV.W, PV.Z,
+; EG-NEXT:     CNDE_INT T0.X, T1.W, T1.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 -1065353216, %a
diff --git a/llvm/test/CodeGen/X86/misched-critical-path.ll b/llvm/test/CodeGen/X86/misched-critical-path.ll
new file mode 100644
index 0000000000000..909692aca2b0a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/misched-critical-path.ll
@@ -0,0 +1,240 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+ at sc = common global i8 0
+ at uc = common global i8 0
+ at ss = common global i16 0
+ at us = common global i16 0
+ at si = common global i32 0
+ at ui = common global i32 0
+ at sl = common global i64 0
+ at ul = common global i64 0
+ at sll = common global i64 0
+ at ull = common global i64 0
+
+; Regression Test for PR92368.
+;
+; CHECK: SU(75):   CMP8rr %49:gr8, %48:gr8, implicit-def $eflags
+; CHECK:   Predecessors:
+; CHECK-NEXT:    SU(73): Data Latency=0 Reg=%49
+; CHECK-NEXT:    SU(74): Out  Latency=0
+; CHECK-NEXT:    SU(72): Out  Latency=0
+; CHECK-NEXT:    SU(70): Data Latency=4 Reg=%48
+define void @misched_bug() nounwind {
+entry:
+  %0 = load i8, i8* @sc, align 1
+  %1 = zext i8 %0 to i32
+  %2 = load i8, i8* @uc, align 1
+  %3 = zext i8 %2 to i32
+  %4 = trunc i32 %3 to i8
+  %5 = trunc i32 %1 to i8
+  %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
+  %6 = extractvalue { i8, i1 } %pair6, 0
+  store i8 %6, i8* @sc, align 1
+  %7 = load i8, i8* @sc, align 1
+  %8 = zext i8 %7 to i32
+  %9 = load i8, i8* @uc, align 1
+  %10 = zext i8 %9 to i32
+  %11 = trunc i32 %10 to i8
+  %12 = trunc i32 %8 to i8
+  %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
+  %13 = extractvalue { i8, i1 } %pair13, 0
+  store i8 %13, i8* @uc, align 1
+  %14 = load i8, i8* @sc, align 1
+  %15 = sext i8 %14 to i16
+  %16 = zext i16 %15 to i32
+  %17 = load i8, i8* @uc, align 1
+  %18 = zext i8 %17 to i32
+  %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
+  %20 = trunc i32 %18 to i16
+  %21 = trunc i32 %16 to i16
+  %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
+  %22 = extractvalue { i16, i1 } %pair22, 0
+  store i16 %22, i16* @ss, align 2
+  %23 = load i8, i8* @sc, align 1
+  %24 = sext i8 %23 to i16
+  %25 = zext i16 %24 to i32
+  %26 = load i8, i8* @uc, align 1
+  %27 = zext i8 %26 to i32
+  %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
+  %29 = trunc i32 %27 to i16
+  %30 = trunc i32 %25 to i16
+  %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
+  %31 = extractvalue { i16, i1 } %pair31, 0
+  store i16 %31, i16* @us, align 2
+  %32 = load i8, i8* @sc, align 1
+  %33 = sext i8 %32 to i32
+  %34 = load i8, i8* @uc, align 1
+  %35 = zext i8 %34 to i32
+  %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
+  %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
+  %37 = extractvalue { i32, i1 } %pair37, 0
+  store i32 %37, i32* @si, align 4
+  %38 = load i8, i8* @sc, align 1
+  %39 = sext i8 %38 to i32
+  %40 = load i8, i8* @uc, align 1
+  %41 = zext i8 %40 to i32
+  %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
+  %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
+  %43 = extractvalue { i32, i1 } %pair43, 0
+  store i32 %43, i32* @ui, align 4
+  %44 = load i8, i8* @sc, align 1
+  %45 = sext i8 %44 to i64
+  %46 = load i8, i8* @uc, align 1
+  %47 = zext i8 %46 to i64
+  %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
+  %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
+  %49 = extractvalue { i64, i1 } %pair49, 0
+  store i64 %49, i64* @sl, align 8
+  %50 = load i8, i8* @sc, align 1
+  %51 = sext i8 %50 to i64
+  %52 = load i8, i8* @uc, align 1
+  %53 = zext i8 %52 to i64
+  %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
+  %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
+  %55 = extractvalue { i64, i1 } %pair55, 0
+  store i64 %55, i64* @ul, align 8
+  %56 = load i8, i8* @sc, align 1
+  %57 = sext i8 %56 to i64
+  %58 = load i8, i8* @uc, align 1
+  %59 = zext i8 %58 to i64
+  %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
+  %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
+  %61 = extractvalue { i64, i1 } %pair61, 0
+  store i64 %61, i64* @sll, align 8
+  %62 = load i8, i8* @sc, align 1
+  %63 = sext i8 %62 to i64
+  %64 = load i8, i8* @uc, align 1
+  %65 = zext i8 %64 to i64
+  %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
+  %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
+  %67 = extractvalue { i64, i1 } %pair67, 0
+  store i64 %67, i64* @ull, align 8
+  %68 = load i8, i8* @sc, align 1
+  %69 = zext i8 %68 to i32
+  %70 = load i8, i8* @uc, align 1
+  %71 = zext i8 %70 to i32
+  %72 = trunc i32 %71 to i8
+  %73 = trunc i32 %69 to i8
+  %pair74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
+  %74 = extractvalue { i8, i1 } %pair74, 0
+  %75 = icmp eq i8 %74, %72
+  %76 = zext i1 %75 to i8
+  %77 = zext i8 %76 to i32
+  store i32 %77, i32* @ui, align 4
+  %78 = load i8, i8* @sc, align 1
+  %79 = zext i8 %78 to i32
+  %80 = load i8, i8* @uc, align 1
+  %81 = zext i8 %80 to i32
+  %82 = trunc i32 %81 to i8
+  %83 = trunc i32 %79 to i8
+  %pair84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
+  %84 = extractvalue { i8, i1 } %pair84, 0
+  %85 = icmp eq i8 %84, %82
+  %86 = zext i1 %85 to i8
+  %87 = zext i8 %86 to i32
+  store i32 %87, i32* @ui, align 4
+  %88 = load i8, i8* @sc, align 1
+  %89 = sext i8 %88 to i16
+  %90 = zext i16 %89 to i32
+  %91 = load i8, i8* @uc, align 1
+  %92 = zext i8 %91 to i32
+  %93 = trunc i32 %92 to i8
+  %94 = trunc i32 %90 to i8
+  %pair95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
+  %95 = extractvalue { i8, i1 } %pair95, 0
+  %96 = icmp eq i8 %95, %93
+  %97 = zext i1 %96 to i8
+  %98 = zext i8 %97 to i32
+  store i32 %98, i32* @ui, align 4
+  %99 = load i8, i8* @sc, align 1
+  %100 = sext i8 %99 to i16
+  %101 = zext i16 %100 to i32
+  %102 = load i8, i8* @uc, align 1
+  %103 = zext i8 %102 to i32
+  %104 = trunc i32 %103 to i8
+  %105 = trunc i32 %101 to i8
+  %pair106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
+  %106 = extractvalue { i8, i1 } %pair106, 0
+  %107 = icmp eq i8 %106, %104
+  %108 = zext i1 %107 to i8
+  %109 = zext i8 %108 to i32
+  store i32 %109, i32* @ui, align 4
+  %110 = load i8, i8* @sc, align 1
+  %111 = sext i8 %110 to i32
+  %112 = load i8, i8* @uc, align 1
+  %113 = zext i8 %112 to i32
+  %114 = trunc i32 %113 to i8
+  %115 = trunc i32 %111 to i8
+  %pair116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
+  %116 = extractvalue { i8, i1 } %pair116, 0
+  %117 = icmp eq i8 %116, %114
+  %118 = zext i1 %117 to i8
+  %119 = zext i8 %118 to i32
+  store i32 %119, i32* @ui, align 4
+  %120 = load i8, i8* @sc, align 1
+  %121 = sext i8 %120 to i32
+  %122 = load i8, i8* @uc, align 1
+  %123 = zext i8 %122 to i32
+  %124 = trunc i32 %123 to i8
+  %125 = trunc i32 %121 to i8
+  %pair126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
+  %126 = extractvalue { i8, i1 } %pair126, 0
+  %127 = icmp eq i8 %126, %124
+  %128 = zext i1 %127 to i8
+  %129 = zext i8 %128 to i32
+  store i32 %129, i32* @ui, align 4
+  %130 = load i8, i8* @sc, align 1
+  %131 = sext i8 %130 to i64
+  %132 = load i8, i8* @uc, align 1
+  %133 = zext i8 %132 to i64
+  %134 = trunc i64 %133 to i8
+  %135 = trunc i64 %131 to i8
+  %pair136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
+  %136 = extractvalue { i8, i1 } %pair136, 0
+  %137 = icmp eq i8 %136, %134
+  %138 = zext i1 %137 to i8
+  %139 = zext i8 %138 to i32
+  store i32 %139, i32* @ui, align 4
+  %140 = load i8, i8* @sc, align 1
+  %141 = sext i8 %140 to i64
+  %142 = load i8, i8* @uc, align 1
+  %143 = zext i8 %142 to i64
+  %144 = trunc i64 %143 to i8
+  %145 = trunc i64 %141 to i8
+  %pair146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
+  %146 = extractvalue { i8, i1 } %pair146, 0
+  %147 = icmp eq i8 %146, %144
+  %148 = zext i1 %147 to i8
+  %149 = zext i8 %148 to i32
+  store i32 %149, i32* @ui, align 4
+  %150 = load i8, i8* @sc, align 1
+  %151 = sext i8 %150 to i64
+  %152 = load i8, i8* @uc, align 1
+  %153 = zext i8 %152 to i64
+  %154 = trunc i64 %153 to i8
+  %155 = trunc i64 %151 to i8
+  %pair156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
+  %156 = extractvalue { i8, i1 } %pair156, 0
+  %157 = icmp eq i8 %156, %154
+  %158 = zext i1 %157 to i8
+  %159 = zext i8 %158 to i32
+  store i32 %159, i32* @ui, align 4
+  %160 = load i8, i8* @sc, align 1
+  %161 = sext i8 %160 to i64
+  %162 = load i8, i8* @uc, align 1
+  %163 = zext i8 %162 to i64
+  %164 = trunc i64 %163 to i8
+  %165 = trunc i64 %161 to i8
+  %pair166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
+  %166 = extractvalue { i8, i1 } %pair166, 0
+  %167 = icmp eq i8 %166, %164
+  %168 = zext i1 %167 to i8
+  %169 = zext i8 %168 to i32
+  store i32 %169, i32* @ui, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

>From 2d83b289fcb0c80370bc775d3ef9fd1354105f0d Mon Sep 17 00:00:00 2001
From: csstormq <swust_xiaoqiangxu at 163.com>
Date: Wed, 22 May 2024 17:32:08 +0800
Subject: [PATCH 2/2] Fix test case

---
 .../test/CodeGen/X86/misched-critical-path.ll | 416 +++++++++---------
 1 file changed, 205 insertions(+), 211 deletions(-)

diff --git a/llvm/test/CodeGen/X86/misched-critical-path.ll b/llvm/test/CodeGen/X86/misched-critical-path.ll
index 909692aca2b0a..7db1257fb7381 100644
--- a/llvm/test/CodeGen/X86/misched-critical-path.ll
+++ b/llvm/test/CodeGen/X86/misched-critical-path.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
+; REQUIRES: asserts
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
@@ -23,218 +24,211 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-NEXT:    SU(70): Data Latency=4 Reg=%48
 define void @misched_bug() nounwind {
 entry:
-  %0 = load i8, i8* @sc, align 1
-  %1 = zext i8 %0 to i32
-  %2 = load i8, i8* @uc, align 1
-  %3 = zext i8 %2 to i32
-  %4 = trunc i32 %3 to i8
-  %5 = trunc i32 %1 to i8
-  %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
-  %6 = extractvalue { i8, i1 } %pair6, 0
-  store i8 %6, i8* @sc, align 1
-  %7 = load i8, i8* @sc, align 1
-  %8 = zext i8 %7 to i32
-  %9 = load i8, i8* @uc, align 1
-  %10 = zext i8 %9 to i32
-  %11 = trunc i32 %10 to i8
-  %12 = trunc i32 %8 to i8
-  %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
-  %13 = extractvalue { i8, i1 } %pair13, 0
-  store i8 %13, i8* @uc, align 1
-  %14 = load i8, i8* @sc, align 1
-  %15 = sext i8 %14 to i16
-  %16 = zext i16 %15 to i32
-  %17 = load i8, i8* @uc, align 1
-  %18 = zext i8 %17 to i32
-  %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %20 = trunc i32 %18 to i16
-  %21 = trunc i32 %16 to i16
-  %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
-  %22 = extractvalue { i16, i1 } %pair22, 0
-  store i16 %22, i16* @ss, align 2
-  %23 = load i8, i8* @sc, align 1
-  %24 = sext i8 %23 to i16
-  %25 = zext i16 %24 to i32
-  %26 = load i8, i8* @uc, align 1
-  %27 = zext i8 %26 to i32
-  %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %29 = trunc i32 %27 to i16
-  %30 = trunc i32 %25 to i16
-  %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
-  %31 = extractvalue { i16, i1 } %pair31, 0
-  store i16 %31, i16* @us, align 2
-  %32 = load i8, i8* @sc, align 1
-  %33 = sext i8 %32 to i32
-  %34 = load i8, i8* @uc, align 1
-  %35 = zext i8 %34 to i32
-  %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
-  %37 = extractvalue { i32, i1 } %pair37, 0
-  store i32 %37, i32* @si, align 4
-  %38 = load i8, i8* @sc, align 1
-  %39 = sext i8 %38 to i32
-  %40 = load i8, i8* @uc, align 1
-  %41 = zext i8 %40 to i32
-  %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
-  %43 = extractvalue { i32, i1 } %pair43, 0
-  store i32 %43, i32* @ui, align 4
-  %44 = load i8, i8* @sc, align 1
-  %45 = sext i8 %44 to i64
-  %46 = load i8, i8* @uc, align 1
-  %47 = zext i8 %46 to i64
-  %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
-  %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
-  %49 = extractvalue { i64, i1 } %pair49, 0
-  store i64 %49, i64* @sl, align 8
-  %50 = load i8, i8* @sc, align 1
-  %51 = sext i8 %50 to i64
-  %52 = load i8, i8* @uc, align 1
-  %53 = zext i8 %52 to i64
-  %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
-  %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
-  %55 = extractvalue { i64, i1 } %pair55, 0
-  store i64 %55, i64* @ul, align 8
-  %56 = load i8, i8* @sc, align 1
-  %57 = sext i8 %56 to i64
-  %58 = load i8, i8* @uc, align 1
-  %59 = zext i8 %58 to i64
-  %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
-  %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
-  %61 = extractvalue { i64, i1 } %pair61, 0
-  store i64 %61, i64* @sll, align 8
-  %62 = load i8, i8* @sc, align 1
-  %63 = sext i8 %62 to i64
-  %64 = load i8, i8* @uc, align 1
-  %65 = zext i8 %64 to i64
-  %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
-  %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
-  %67 = extractvalue { i64, i1 } %pair67, 0
-  store i64 %67, i64* @ull, align 8
-  %68 = load i8, i8* @sc, align 1
-  %69 = zext i8 %68 to i32
-  %70 = load i8, i8* @uc, align 1
-  %71 = zext i8 %70 to i32
-  %72 = trunc i32 %71 to i8
-  %73 = trunc i32 %69 to i8
-  %pair74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
-  %74 = extractvalue { i8, i1 } %pair74, 0
-  %75 = icmp eq i8 %74, %72
-  %76 = zext i1 %75 to i8
-  %77 = zext i8 %76 to i32
-  store i32 %77, i32* @ui, align 4
-  %78 = load i8, i8* @sc, align 1
-  %79 = zext i8 %78 to i32
-  %80 = load i8, i8* @uc, align 1
-  %81 = zext i8 %80 to i32
-  %82 = trunc i32 %81 to i8
-  %83 = trunc i32 %79 to i8
-  %pair84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
-  %84 = extractvalue { i8, i1 } %pair84, 0
-  %85 = icmp eq i8 %84, %82
-  %86 = zext i1 %85 to i8
-  %87 = zext i8 %86 to i32
-  store i32 %87, i32* @ui, align 4
-  %88 = load i8, i8* @sc, align 1
-  %89 = sext i8 %88 to i16
-  %90 = zext i16 %89 to i32
-  %91 = load i8, i8* @uc, align 1
-  %92 = zext i8 %91 to i32
-  %93 = trunc i32 %92 to i8
-  %94 = trunc i32 %90 to i8
-  %pair95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
-  %95 = extractvalue { i8, i1 } %pair95, 0
-  %96 = icmp eq i8 %95, %93
-  %97 = zext i1 %96 to i8
-  %98 = zext i8 %97 to i32
-  store i32 %98, i32* @ui, align 4
-  %99 = load i8, i8* @sc, align 1
-  %100 = sext i8 %99 to i16
-  %101 = zext i16 %100 to i32
-  %102 = load i8, i8* @uc, align 1
-  %103 = zext i8 %102 to i32
-  %104 = trunc i32 %103 to i8
-  %105 = trunc i32 %101 to i8
-  %pair106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
-  %106 = extractvalue { i8, i1 } %pair106, 0
-  %107 = icmp eq i8 %106, %104
-  %108 = zext i1 %107 to i8
-  %109 = zext i8 %108 to i32
-  store i32 %109, i32* @ui, align 4
-  %110 = load i8, i8* @sc, align 1
-  %111 = sext i8 %110 to i32
-  %112 = load i8, i8* @uc, align 1
-  %113 = zext i8 %112 to i32
-  %114 = trunc i32 %113 to i8
-  %115 = trunc i32 %111 to i8
-  %pair116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
-  %116 = extractvalue { i8, i1 } %pair116, 0
-  %117 = icmp eq i8 %116, %114
-  %118 = zext i1 %117 to i8
-  %119 = zext i8 %118 to i32
-  store i32 %119, i32* @ui, align 4
-  %120 = load i8, i8* @sc, align 1
-  %121 = sext i8 %120 to i32
-  %122 = load i8, i8* @uc, align 1
-  %123 = zext i8 %122 to i32
-  %124 = trunc i32 %123 to i8
-  %125 = trunc i32 %121 to i8
-  %pair126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
-  %126 = extractvalue { i8, i1 } %pair126, 0
-  %127 = icmp eq i8 %126, %124
-  %128 = zext i1 %127 to i8
-  %129 = zext i8 %128 to i32
-  store i32 %129, i32* @ui, align 4
-  %130 = load i8, i8* @sc, align 1
-  %131 = sext i8 %130 to i64
-  %132 = load i8, i8* @uc, align 1
-  %133 = zext i8 %132 to i64
-  %134 = trunc i64 %133 to i8
-  %135 = trunc i64 %131 to i8
-  %pair136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
-  %136 = extractvalue { i8, i1 } %pair136, 0
-  %137 = icmp eq i8 %136, %134
-  %138 = zext i1 %137 to i8
-  %139 = zext i8 %138 to i32
-  store i32 %139, i32* @ui, align 4
-  %140 = load i8, i8* @sc, align 1
-  %141 = sext i8 %140 to i64
-  %142 = load i8, i8* @uc, align 1
-  %143 = zext i8 %142 to i64
-  %144 = trunc i64 %143 to i8
-  %145 = trunc i64 %141 to i8
-  %pair146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
-  %146 = extractvalue { i8, i1 } %pair146, 0
-  %147 = icmp eq i8 %146, %144
-  %148 = zext i1 %147 to i8
-  %149 = zext i8 %148 to i32
-  store i32 %149, i32* @ui, align 4
-  %150 = load i8, i8* @sc, align 1
-  %151 = sext i8 %150 to i64
-  %152 = load i8, i8* @uc, align 1
-  %153 = zext i8 %152 to i64
-  %154 = trunc i64 %153 to i8
-  %155 = trunc i64 %151 to i8
-  %pair156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
-  %156 = extractvalue { i8, i1 } %pair156, 0
-  %157 = icmp eq i8 %156, %154
-  %158 = zext i1 %157 to i8
-  %159 = zext i8 %158 to i32
-  store i32 %159, i32* @ui, align 4
-  %160 = load i8, i8* @sc, align 1
-  %161 = sext i8 %160 to i64
-  %162 = load i8, i8* @uc, align 1
-  %163 = zext i8 %162 to i64
-  %164 = trunc i64 %163 to i8
-  %165 = trunc i64 %161 to i8
-  %pair166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
-  %166 = extractvalue { i8, i1 } %pair166, 0
-  %167 = icmp eq i8 %166, %164
-  %168 = zext i1 %167 to i8
-  %169 = zext i8 %168 to i32
-  store i32 %169, i32* @ui, align 4
+  %v0 = load i8, ptr @sc, align 1
+  %v1 = zext i8 %v0 to i32
+  %v2 = load i8, ptr @uc, align 1
+  %v3 = zext i8 %v2 to i32
+  %v4 = trunc i32 %v3 to i8
+  %v5 = trunc i32 %v1 to i8
+  %pair6 = cmpxchg ptr @sc, i8 %v4, i8 %v5 monotonic monotonic
+  %v6 = extractvalue { i8, i1 } %pair6, 0
+  store i8 %v6, ptr @sc, align 1
+  %v7 = load i8, ptr @sc, align 1
+  %v8 = zext i8 %v7 to i32
+  %v9 = load i8, ptr @uc, align 1
+  %v10 = zext i8 %v9 to i32
+  %v11 = trunc i32 %v10 to i8
+  %v12 = trunc i32 %v8 to i8
+  %pair13 = cmpxchg ptr @uc, i8 %v11, i8 %v12 monotonic monotonic
+  %v13 = extractvalue { i8, i1 } %pair13, 0
+  store i8 %v13, ptr @uc, align 1
+  %v14 = load i8, ptr @sc, align 1
+  %v15 = sext i8 %v14 to i16
+  %v16 = zext i16 %v15 to i32
+  %v17 = load i8, ptr @uc, align 1
+  %v18 = zext i8 %v17 to i32
+  %v20 = trunc i32 %v18 to i16
+  %v21 = trunc i32 %v16 to i16
+  %pair22 = cmpxchg ptr @ss, i16 %v20, i16 %v21 monotonic monotonic
+  %v22 = extractvalue { i16, i1 } %pair22, 0
+  store i16 %v22, ptr @ss, align 2
+  %v23 = load i8, ptr @sc, align 1
+  %v24 = sext i8 %v23 to i16
+  %v25 = zext i16 %v24 to i32
+  %v26 = load i8, ptr @uc, align 1
+  %v27 = zext i8 %v26 to i32
+  %v29 = trunc i32 %v27 to i16
+  %v30 = trunc i32 %v25 to i16
+  %pair31 = cmpxchg ptr @us, i16 %v29, i16 %v30 monotonic monotonic
+  %v31 = extractvalue { i16, i1 } %pair31, 0
+  store i16 %v31, ptr @us, align 2
+  %v32 = load i8, ptr @sc, align 1
+  %v33 = sext i8 %v32 to i32
+  %v34 = load i8, ptr @uc, align 1
+  %v35 = zext i8 %v34 to i32
+  %pair37 = cmpxchg ptr @si, i32 %v35, i32 %v33 monotonic monotonic
+  %v37 = extractvalue { i32, i1 } %pair37, 0
+  store i32 %v37, ptr @si, align 4
+  %v38 = load i8, ptr @sc, align 1
+  %v39 = sext i8 %v38 to i32
+  %v40 = load i8, ptr @uc, align 1
+  %v41 = zext i8 %v40 to i32
+  %pair43 = cmpxchg ptr @ui, i32 %v41, i32 %v39 monotonic monotonic
+  %v43 = extractvalue { i32, i1 } %pair43, 0
+  store i32 %v43, ptr @ui, align 4
+  %v44 = load i8, ptr @sc, align 1
+  %v45 = sext i8 %v44 to i64
+  %v46 = load i8, ptr @uc, align 1
+  %v47 = zext i8 %v46 to i64
+  %pair49 = cmpxchg ptr @sl, i64 %v47, i64 %v45 monotonic monotonic
+  %v49 = extractvalue { i64, i1 } %pair49, 0
+  store i64 %v49, ptr @sl, align 8
+  %v50 = load i8, ptr @sc, align 1
+  %v51 = sext i8 %v50 to i64
+  %v52 = load i8, ptr @uc, align 1
+  %v53 = zext i8 %v52 to i64
+  %pair55 = cmpxchg ptr @ul, i64 %v53, i64 %v51 monotonic monotonic
+  %v55 = extractvalue { i64, i1 } %pair55, 0
+  store i64 %v55, ptr @ul, align 8
+  %v56 = load i8, ptr @sc, align 1
+  %v57 = sext i8 %v56 to i64
+  %v58 = load i8, ptr @uc, align 1
+  %v59 = zext i8 %v58 to i64
+  %pair61 = cmpxchg ptr @sll, i64 %v59, i64 %v57 monotonic monotonic
+  %v61 = extractvalue { i64, i1 } %pair61, 0
+  store i64 %v61, ptr @sll, align 8
+  %v62 = load i8, ptr @sc, align 1
+  %v63 = sext i8 %v62 to i64
+  %v64 = load i8, ptr @uc, align 1
+  %v65 = zext i8 %v64 to i64
+  %pair67 = cmpxchg ptr @ull, i64 %v65, i64 %v63 monotonic monotonic
+  %v67 = extractvalue { i64, i1 } %pair67, 0
+  store i64 %v67, ptr @ull, align 8
+  %v68 = load i8, ptr @sc, align 1
+  %v69 = zext i8 %v68 to i32
+  %v70 = load i8, ptr @uc, align 1
+  %v71 = zext i8 %v70 to i32
+  %v72 = trunc i32 %v71 to i8
+  %v73 = trunc i32 %v69 to i8
+  %pair74 = cmpxchg ptr @sc, i8 %v72, i8 %v73 monotonic monotonic
+  %v74 = extractvalue { i8, i1 } %pair74, 0
+  %v75 = icmp eq i8 %v74, %v72
+  %v76 = zext i1 %v75 to i8
+  %v77 = zext i8 %v76 to i32
+  store i32 %v77, ptr @ui, align 4
+  %v78 = load i8, ptr @sc, align 1
+  %v79 = zext i8 %v78 to i32
+  %v80 = load i8, ptr @uc, align 1
+  %v81 = zext i8 %v80 to i32
+  %v82 = trunc i32 %v81 to i8
+  %v83 = trunc i32 %v79 to i8
+  %pair84 = cmpxchg ptr @uc, i8 %v82, i8 %v83 monotonic monotonic
+  %v84 = extractvalue { i8, i1 } %pair84, 0
+  %v85 = icmp eq i8 %v84, %v82
+  %v86 = zext i1 %v85 to i8
+  %v87 = zext i8 %v86 to i32
+  store i32 %v87, ptr @ui, align 4
+  %v88 = load i8, ptr @sc, align 1
+  %v89 = sext i8 %v88 to i16
+  %v90 = zext i16 %v89 to i32
+  %v91 = load i8, ptr @uc, align 1
+  %v92 = zext i8 %v91 to i32
+  %v93 = trunc i32 %v92 to i8
+  %v94 = trunc i32 %v90 to i8
+  %pair95 = cmpxchg ptr @ss, i8 %v93, i8 %v94 monotonic monotonic
+  %v95 = extractvalue { i8, i1 } %pair95, 0
+  %v96 = icmp eq i8 %v95, %v93
+  %v97 = zext i1 %v96 to i8
+  %v98 = zext i8 %v97 to i32
+  store i32 %v98, ptr @ui, align 4
+  %v99 = load i8, ptr @sc, align 1
+  %v100 = sext i8 %v99 to i16
+  %v101 = zext i16 %v100 to i32
+  %v102 = load i8, ptr @uc, align 1
+  %v103 = zext i8 %v102 to i32
+  %v104 = trunc i32 %v103 to i8
+  %v105 = trunc i32 %v101 to i8
+  %pair106 = cmpxchg ptr @us, i8 %v104, i8 %v105 monotonic monotonic
+  %v106 = extractvalue { i8, i1 } %pair106, 0
+  %v107 = icmp eq i8 %v106, %v104
+  %v108 = zext i1 %v107 to i8
+  %v109 = zext i8 %v108 to i32
+  store i32 %v109, ptr @ui, align 4
+  %v110 = load i8, ptr @sc, align 1
+  %v111 = sext i8 %v110 to i32
+  %v112 = load i8, ptr @uc, align 1
+  %v113 = zext i8 %v112 to i32
+  %v114 = trunc i32 %v113 to i8
+  %v115 = trunc i32 %v111 to i8
+  %pair116 = cmpxchg ptr @si, i8 %v114, i8 %v115 monotonic monotonic
+  %v116 = extractvalue { i8, i1 } %pair116, 0
+  %v117 = icmp eq i8 %v116, %v114
+  %v118 = zext i1 %v117 to i8
+  %v119 = zext i8 %v118 to i32
+  store i32 %v119, ptr @ui, align 4
+  %v120 = load i8, ptr @sc, align 1
+  %v121 = sext i8 %v120 to i32
+  %v122 = load i8, ptr @uc, align 1
+  %v123 = zext i8 %v122 to i32
+  %v124 = trunc i32 %v123 to i8
+  %v125 = trunc i32 %v121 to i8
+  %pair126 = cmpxchg ptr @ui, i8 %v124, i8 %v125 monotonic monotonic
+  %v126 = extractvalue { i8, i1 } %pair126, 0
+  %v127 = icmp eq i8 %v126, %v124
+  %v128 = zext i1 %v127 to i8
+  %v129 = zext i8 %v128 to i32
+  store i32 %v129, ptr @ui, align 4
+  %v130 = load i8, ptr @sc, align 1
+  %v131 = sext i8 %v130 to i64
+  %v132 = load i8, ptr @uc, align 1
+  %v133 = zext i8 %v132 to i64
+  %v134 = trunc i64 %v133 to i8
+  %v135 = trunc i64 %v131 to i8
+  %pair136 = cmpxchg ptr @sl, i8 %v134, i8 %v135 monotonic monotonic
+  %v136 = extractvalue { i8, i1 } %pair136, 0
+  %v137 = icmp eq i8 %v136, %v134
+  %v138 = zext i1 %v137 to i8
+  %v139 = zext i8 %v138 to i32
+  store i32 %v139, ptr @ui, align 4
+  %v140 = load i8, ptr @sc, align 1
+  %v141 = sext i8 %v140 to i64
+  %v142 = load i8, ptr @uc, align 1
+  %v143 = zext i8 %v142 to i64
+  %v144 = trunc i64 %v143 to i8
+  %v145 = trunc i64 %v141 to i8
+  %pair146 = cmpxchg ptr @ul, i8 %v144, i8 %v145 monotonic monotonic
+  %v146 = extractvalue { i8, i1 } %pair146, 0
+  %v147 = icmp eq i8 %v146, %v144
+  %v148 = zext i1 %v147 to i8
+  %v149 = zext i8 %v148 to i32
+  store i32 %v149, ptr @ui, align 4
+  %v150 = load i8, ptr @sc, align 1
+  %v151 = sext i8 %v150 to i64
+  %v152 = load i8, ptr @uc, align 1
+  %v153 = zext i8 %v152 to i64
+  %v154 = trunc i64 %v153 to i8
+  %v155 = trunc i64 %v151 to i8
+  %pair156 = cmpxchg ptr @sll, i8 %v154, i8 %v155 monotonic monotonic
+  %v156 = extractvalue { i8, i1 } %pair156, 0
+  %v157 = icmp eq i8 %v156, %v154
+  %v158 = zext i1 %v157 to i8
+  %v159 = zext i8 %v158 to i32
+  store i32 %v159, ptr @ui, align 4
+  %v160 = load i8, ptr @sc, align 1
+  %v161 = sext i8 %v160 to i64
+  %v162 = load i8, ptr @uc, align 1
+  %v163 = zext i8 %v162 to i64
+  %v164 = trunc i64 %v163 to i8
+  %v165 = trunc i64 %v161 to i8
+  %pair166 = cmpxchg ptr @ull, i8 %v164, i8 %v165 monotonic monotonic
+  %v166 = extractvalue { i8, i1 } %pair166, 0
+  %v167 = icmp eq i8 %v166, %v164
+  %v168 = zext i1 %v167 to i8
+  %v169 = zext i8 %v168 to i32
+  store i32 %v169, ptr @ui, align 4
   br label %return
 
-return:                                           ; preds = %entry
+return:                                           ; preds = %ventry
   ret void
 }
+



More information about the llvm-commits mailing list